1 Raw data

data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")

# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned  
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)

print(data)

2 Continuous Phenotype

# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")

Summary statistics My Image

Histogram My Image

Density My Image

Box_Plot My Image

qq_Plot My Image

Shapiro-Wilk normality test My Image

2.1 Categorical Phenotype

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

data$Categorical <- ifelse(data$T4Cortisol >= 956, "H", 
                           ifelse(data$T4Cortisol <= 190.8, "L", "M"))

table(data$Categorical)
library(ggplot2)

# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))

# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
  geom_bar() +
  labs(title = "Histogram of T4Cortisol by Category",
       x = "Category",
       y = "Frequency") +
  theme_minimal()

# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
  labs(title = "Histogram of T4Cortisol with Color by Category",
       x = "T4 Cortisol",
       y = "Frequency",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
  geom_density(alpha = 0.3) +
  labs(title = "Density Plot of T4Cortisol with Color by Category",
       x = "T4Cortisol",
       y = "Density",
       fill = "Category") +
  scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
  theme_minimal()

# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
  geom_density() +
  geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
  labs(title = "Density Plot of T4Cortisol with Vertical Lines",
       x = "T4Cortisol",
       y = "Density") +
  theme_minimal()

The animals were sorted in these three categories >H = Hight >M = Medium >L = Low

My Image

The individuals frequency distribution in theese categories are shown in the plots below

My Image My Image My Image My Image

2.2 Removing “outliers”

Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.

library(tidyverse)

data_no_out <- filter(data, T4Cortisol < 1250)

# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")

boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")

hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")

shapiro.test(data$T4Cortisol)

My Image My Image My Image My Image

3 GENOTYPES

Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno

My Image

In this folder we found the following files:

I made a copy of this files in a folder called Raw_files:

/home/bambrozi/2_CORTISOL/RawFiles

This directory has two sub-directories:

4 GWAS

4.1 Checking with Phenotyped Animals also have Genotype

library(data.table)

pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]

#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)

#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,] 

#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)

4.2 Generating a Phenotype file

The phenotype file should have three columns: FID, Animal_id, Phenotype

HO <- rep("HO", 252)

pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))

colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")

pheno_gwas$cdn_id <-  as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)

write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)

4.3 Adjusting the SNP_map to .map

map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)

4.4 Generating the bfiles

system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
    genoplink.bed
  • genoplink.bim
  • genoplink.fam
  • genoplink.log
  • genoplink.nosex

5 Quality Control

We ran the code below to perfom the QC

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean

/home/local/bin/plink \
    --bfile ${DATA} \
    --cow \
    --allow-no-sex \
    --hwe 1e-5 \
    --maf 0.01 \
    --geno 0.1 \
    --mind 0.1 \
    --keep-allele-order \
    --make-bed \
    --out ${RESULT}
    

The server screen outcome is shown below. My Image

After the Quality Control we ended up with

6 KING

To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink.

#!/bin/bash

DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/ONLY_GRASSHILL_AND_PHENO_after_QC/only_grasshill_and_pheno_clean
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/KING/result_king

plink2 \
    --bfile ${DATA} \
    --chr-set 29 \
    --make-king-table \
    --out ${RESULT}

This is the output screen on terminal:

My Image

The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.

My Image

Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga:

setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")

relatedness="result_king.kin0" ## change accordingly!!

library(data.table)

print(relatedness)
rel=fread(relatedness, h = T)
head(rel)

print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354  & IID1!=IID2)
print(dup)
nrow(dup)

So the code above will provide this output if there is any duplicated individual.

My Image

We do not have any duplicated individual

So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC

files:genoplink_clean

After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file

7 PCA

Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.

#!/bin/bash

DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/imput_pca
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/pca_result

plink \
    --bfile ${DATA} \
    --keep-allele-order \
    --chr-set 29 \
    --pca \
    --out ${RESULT}

The PCA output:

My Image

7.1 PCA Plot

After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot

setwd("/home/bambrozi/2_CORTISOL/PCA")

library(ggplot2) 
library(tidyverse)

##
# Visualize PCA results
###

# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)

## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)


# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()


# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
  geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
  geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) +  # Add labels for animal IDs
  geom_hline(yintercept = 0, linetype="dotted") +
  geom_vline(xintercept = 0, linetype="dotted") +
  labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
       y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
  theme_minimal()
dev.off()

My Image

8 GWAS on GCTA

Previously we have performed GWAS on GCTA:

9 GWAS - EXTREME PHENO - WITH BY and SD

9.1 Data preparation

I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.

My Image

pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)


# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]

# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H", 
                           ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]

# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]

#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)

#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]

write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)

We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals

On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt

plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}

9.2 GWAS on GCTA

#!/bin/bash

DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt

/home/local/bin/gcta \
    --bfile ${DATA} \
    --mlma-loco \
    --pheno ${PHENO} \
    --qcovar ${FIXEFF} \
    --autosome-num 29 \
    --autosome \
    --out ${RESULT}

After ran the GWAS above I got the following message from the GCTA:

Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.

As we got this error message, we needed to solve this problem, and for that we used the whole sample size (252 individuals) to estimate the variance components, and after this, using this variance components from the whole sample we performed the ssGWAS with the subset of individuals (34 High + 37 Low = 71), but this was not possible in GCTA so we switched to another software (BLUPF90+)

9.3 BLUPF90+ GWAS

To run ssGWAS on Blupf90+ suite, we will need 4 different softwares:

  • renum: just to renumerate the files and generate the renf90.par
  • preGSF90: just to perfome a quality control with different parameter from the default.
  • blupf90+: used to estimate VCE and generate Ainv and Ginv
  • postGSF90: perform GWAS

The tutorial for preGSF90 and postGSF90 we can find in the link bellow https://nce.ads.uga.edu/wiki/doku.php?id=readme.pregsf90#gwas_options_postgsf90

According to the BLUPF90+ tutorial:

ssGWAS is an iterative procedure with 2 steps:
0) Quality control
1) prediction of GEBV with ssGBLUP
2) prediction of SNP marker effects based on the GEBV

9.3.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • preGSf90 executable file
  • postGSf90 executable file
  • Parameter file

      9.3.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Phenotype
      THIRD COLUMN = Fixed Effect 1
      FOURTH COLUMN = Fixed Effect 2

      First we are going to generate a Phenotype_Fixed_Effect file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      To get in one file these four columns we need the following code:

      fixeff <- read.table("/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt", header = T)
      pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
      ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
      
      fixeff <- fixeff[, c("cdn_id", "BIRTH_YEAR", "Sampling_date")]
      pheno <- pheno[,c("cdn_id", "cortisol")]
      
      # Load necessary libraries
      library(dplyr)
      
      # Merge pheno and fixeff data frames
      merged_data <- pheno %>% 
        left_join(fixeff, by = "cdn_id")
      
      
      merged_data$iid <- ids_eq$iid[match(merged_data$cdn_id, ids_eq$cdn_id)]
      
      merged_data <- merged_data[, c("iid", "cortisol", "BIRTH_YEAR", "Sampling_date")]
      
      write.table(merged_data, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt", col.names = F, quote = F, row.names = F)

      The file should be saved as text file, with separation by space and no columns names.

      PS: if there are any NA, they sould be replaced by 9999

      9.3.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID
      SECOND COLUMN = Sire ID
      THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      # to replace comma for space in the .csv file with the equivalence among IDs
      sed -i 's/,/ /g' bruno_ids.csv

      9.3.1.3 Genotype file

      First we are going to generate a Genotype file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      Below we can find the file’s location from the code above: /home/bambrozi/2_CORTISOL/RawFiles/Genotypes/bruno_gntps.txt /home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/bruno_gntps_iid

9.3.2 Download the executable files

Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
  • BlupF90+ = we will use to estimate the Variance components and GEBVs
  • renumF90 = we will use to renumerate the files
  • preGSf90 = we will use to perform the Quality control
  • postGSf90 = we will use for GWAS

9.3.3 SNP MAP

mapfile <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/DGVsnpinfo.2404.ho", header = T)

colnames(mapfile)

colnames(mapfile) <- c("SNP_ID", "CHR", "POS")

mapfile <- mapfile[,c("CHR", "POS", "SNP_ID")]

write.table(mapfile, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/snpmap.txt", col.names = T, row.names = F, quote = F)

9.3.4 Parameter file for Quality Control

But, before running the GEBV we will first perform one additional step to “CLEAN” our genotypes. Actually BLUPF90 by default perform a data cleaning with pre set parameters, but as HWE is not used by default, we will perform this additional step.

The Parameter card for this step is the parameter bellow:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renum_QC.par


DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
1.0
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid
PED_DEPTH
0
(CO)VARIANCES
1.0
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
  • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
  • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
  • FIELDS_PASSED TO OUTPUT:
  • WEIGHT (S):
  • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
  • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
  • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
  • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
  • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
  • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
  • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
  • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
  • OPTION outcallrate: Save the call rate information on SNP markers in the file.
  • OPTION saveCleanSNPs: This option generates 4 new files. We assume snpfile as a marker file.
    • snpfile_clean = new SNP marker file.
    • snpfile_clean_XrefID = new cross-reference file.
    • snpfile_SNPs_removed = a list of removed markers.
    • snpfile_Animals_removed = a list of removed animals.
  • OPTION minfreq 0.01: Minimum allele frequency to retain the marker.
  • OPTION map_file snpmap.txt: This option will upload the SNP MAP
  • OPTION excludeCHR 30 31: This option will remove sexual chromosome that is the 30 and 31

To run any softwere from Blupf90 suit we will perform always in this way:

  1. Go to the server you wanna run this analysis, for instance, grand
ssh grand
  1. Now go to the directory you have created to run this analysis where that set of files are placed.
cd /home/bambrozi/2_CORTISOL/GWAS/BLUPF90
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will ask you the name of your parameter card, for this step is renum_QC.par.

The command above will generate couple files, among them renf90.par

We modified renf90.par in:
  • renf90_DataClean.par
  • renf90_VarCompEst.par

The parameter card to perform the Quality Control is: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_DataClean.par

It will be run using the software pre preGS90 to generate the Clean Genotype and SNP_MAP files after Quality Control.

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

The second parameter card used for Variance Components Estimation (VCE) is the following: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_VarCompEst.par

It will be run using the software pre blupf90+ to generate the VCE.

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        23 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   1.0000    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   1.0000    
OPTION SNP_file bruno_gntps_iid_clean
OPTION no_quality_control
OPTION method VCE
OPTION origID
OPTION missing 9999
OPTION se_covar_function H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
  • OPTION SNP_file bruno_gntps_iid_clean: we are going to inform the genotype file generated in the previous step (the Quality Control). Blup will create an file with the same name that the original genotype file, and add the sufix **_clean**
  • OPTION no_quality_control we need to set up this option because we performed Quality Control in the previous step and now we don’t need that Blupf90+ perform again. Blupf90+ by default perform quality control, so if we do not want, we need to specify.
  • OPTION method: VCE (Variance Component Estimation).
  • OPTION OrigID: this will keep the original ID informed.
  • OPTION missing 9999: you are informing that missing values will appear as 9999
  • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
    • H2_1: the name that your function will appear on the output files.
    • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
    • **_1_1**: this effect is in the 1st column.
    • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

In the parameter card above, we remove the option for Quality Control and added options for Variance Components Estimation, for Missing data, for origID and for heritability estimation, but the MOST IMPORTANT PART is we need to change the OPTION SNP_FILE, replacing the original genotype file, for the “clean” version generated in the previous step.

The Variance Components will be placed in the file: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/blupf90.log

blupf90.log
My Image

Now you should update you renf90_VarCompEst.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90_VarCompEst.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90_VarCompEst.par (CO) VARIANCE

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

blupf90.log
My Image

Parameter card
My Image

Now that we have the Variance components we go for the next step:
  • Prediction of SNP marker effects based on the GEBV
  • GWAS for High and Low cortisol animals
To do this, I’ll:
  • generate the Phenotype_FixEff file like bellow
  • generate the genotype file like bellow
  • Perform the QUALITY CONTROL for the new (71 samples) genotype file
  • Add the VCE from the 252 data set in the parameter card
  • Generate the GEBV
  • Run GWAS

9.3.5 Prediction of SNP marker effects based on the GEBV

9.3.6 GWAS for High and Low cortisol animals

Now we’ll run a new analysis using the Variance Components Estimation from the previous step to perform the GWAS.

To perform this first we need a Phenotype file, and a Genotype file only with the 71 animals (High=34 and Low=37)

9.3.6.1 Updating the files

9.3.6.1.1 Phenotype and Fixed Effects files

I used the code bellow to remove individuals with MEDIUM cortisol Phenotype

phenofix <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt")


# Now we are gona remove the intermediary animals from pheno object
phenofix$Categorical <- ifelse(phenofix$V2 >= 956, "H", 
                            ifelse(phenofix$V2 <= 190.8, "L", "M"))
table(phenofix$Categorical)

phenofix <- phenofix[phenofix$Categorical != "M", ]

phenofix <- phenofix[, c("V1", "V2", "V3", "V4")]

write.table(phenofix, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/fenofix.txt", col.names = F, row.names = F, quote = F)
9.3.6.1.2 Genotype files

I used this command line bellow to remove the individuals with MEDIUM phenotypes.

awk 'NR==FNR{ids[$1]; next} $1 in ids' fenofix.txt bruno_gntps_iid > bruno_gntps_iid_71

9.3.6.2 Running renum_QC.par

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renum_QC.par

DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT

WEIGHT(S)

RESIDUAL_VARIANCE
77182
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid_71
PED_DEPTH
0
(CO)VARIANCES
28212
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
We modified renf90.par in 3 copies:
  • renf90_DataClean.par
  • renf90_ssGWAS1_Ginv.par
  • renf90_ssGWAS2_SNPeff.par

9.3.6.3 Running renf90_DataClean.par for Quality Control

To run the parameter card bellow we are going to use the software presGSf90 /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renf90_DataClean.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31

9.3.6.4 Running renf90_ssGWAS1_Ginv.par for Ginv estimation

May be necessary to run the command bellow on the server Setting the stack size to “unlimited” allows the program to allocate memory for these large structures without hitting stack limits. By removing stack size limits, BLUPF90 is less likely to encounter segmentation faults or memory allocation issues that arise when the stack space is insufficient for the computations needed.

ulimit -s unlimited

The parameter card bellow we are going to run using the software blupf90+:

/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renf90_ssGWAS1_Ginv.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION no_quality_control
OPTION origID
OPTION missing 9999
OPTION saveGInverse
OPTION saveA22Inverse
OPTION snp_p_value

9.3.6.5 Running renf90_ssGWAS2_SNPeff.par for GWAS

The parameter card bellow we are going to run using the software postGSf90:

# BLUPF90 parameter file created by RENUMF90
DATAFILE
 renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot 
This code will generate several files, among them:
  • chrsnp_pval:
    • Column 1: trait
    • Column 2: effect
    • Column 3: -log10(p-value)
    • Column 4: SNP
    • Column 5: Chromosome
    • Column 6: Position in bp
  • Pft1e3.R: a r-code to generate the Manhattan plot in R using the chrsnp_pval

9.3.6.6 Manhattan Plots for BLUPF90 GWAS

9.3.6.6.1 Genome Independent Segment

To make the Manhattan Plot considering Genome Independent Segment we should run the code bellow. This code has part of the code in the file Pft1e3.R

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO")
# Read in and process data for Manhattan plot
yyy1 <- read.table("chrsnp_pval")
yyy <- yyy1[order(yyy1$V4), ]
zzz <- yyy[which(yyy$V1 == 1 & yyy$V2 == 3), ]
n <- nrow(zzz)
y <- zzz[, 4]
x <- zzz[, 3]
chr1 <- zzz[, 5]
chr <- NULL
pos <- NULL

for (i in unique(yyy$V5)) {
  zz <- yyy[yyy$V5 == i, ]
  key <- zz$V4
  medio <- round(nrow(zz) / 2, 0)
  z <- key[medio]
  pos <- c(pos, z)
}

chrn <- unique(yyy$V5)
one <- which(chr1 %% 4 == 0)
two <- which(chr1 %% 4 == 1)
three <- which(chr1 %% 4 == 2)
four <- which(chr1 %% 4 == 3)
chr[one] <- "darkgoldenrod"
chr[two] <- "darkorchid"
chr[three] <- "blue"
chr[four] <- "forestgreen"

# Create Manhattan plot with Bonferroni line and legend
pdf(file = "Pft1e3_manplot_with_bonf_ind_seg.pdf", family = "sans", height = 27.8, width = 50, pointsize = 20, bg = "white")
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)

# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)

# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)

axis(1, at = pos, labels = chrn)
dev.off()

My Image

9.3.6.7 Get rsID

For additional analysis like Variant Effect Prediction (VEP) we need the rsID, to get the rsID we use the software SNPChimp which requires SNP_names, but the BLUPF90 output have only the Chromosome and Position of the SNPs, so we are going to perfome these two steps to get one file with t he significant SNPs + SNP_name + rsID

9.3.6.7.1 Step 01 = Bring the SNP name to GWAS output

For this analysis we have to build this new sheet bringing SNP ID from snpmap.txt

gwas = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval")
colnames(gwas) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
gwas <- filter(gwas, LOG_P >= bonf)

snpmap <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/snpmap.txt", header = T)

# Filter snpmap to only include rows that match the CHR and BP values in out_genes
filtered_snpmap <- snpmap[snpmap$CHR %in% gwas$CHR & snpmap$POS %in% gwas$BP, ]

# Merge the filtered snpmap with out_genes
gwas_snpname <- merge(gwas, filtered_snpmap[, c("CHR", "POS", "SNP_ID")], 
                   by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), 
                   all.x = TRUE)

gwas_snpname <- gwas_snpname[,c("CHR", "BP", "LOG_P", "SNP_ID")]

write.csv(gwas_snpname, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval_SNPid_ind_seg_sig_BLUPF90.csv")
X CHR BP LOG_P SNP_ID
1 11 19779915 4.828190 Hapmap55558-rs29013980
2 12 17477322 4.417576 BTB-00488482
3 14 15929822 5.467439 ARS-BFGL-NGS-82859
4 15 33066384 5.041271 BTB-00594449
5 15 57930281 4.514377 ARS-BFGL-NGS-30515
6 17 46487068 4.547766 ARS-BFGL-NGS-12510
7 17 47884497 6.622354 ARS-BFGL-NGS-87412
8 17 48800954 5.268940 ARS-BFGL-NGS-112149
9 2 118820218 4.420962 Hapmap53065-rs29026778
10 2 41602429 4.497977 BTB-00096979
11 2 41670655 4.377284 Hapmap48777-BTA-47434
12 20 60365668 4.603817 BTB-01341053
13 20 65351653 4.891206 ARS-BFGL-NGS-91119
14 21 4055731 4.493938 ARS-BFGL-NGS-112210
15 23 44822126 4.316434 ARS-BFGL-NGS-115605
16 24 857728 4.458018 Hapmap47669-BTA-59022
17 28 20016672 5.130557 ARS-BFGL-NGS-116552
18 28 35807366 4.695875 ARS-BFGL-NGS-71077
19 29 33039863 4.933140 ARS-BFGL-NGS-41631
20 3 110866523 4.815943 ARS-BFGL-NGS-118207
21 3 111526170 4.760351 ARS-BFGL-NGS-74948
22 3 111730561 4.390707 BTB-01641394
23 3 111751663 4.390707 ARS-BFGL-NGS-37809
24 3 111772736 5.088738 ARS-BFGL-NGS-25298
25 4 103979600 5.591006 ARS-BFGL-NGS-110705
26 4 107787202 5.375628 ARS-BFGL-NGS-45265
27 4 109568557 4.709882 Hapmap48062-BTA-72409
28 4 110053134 5.245598 UA-IFASA-2147
29 4 24239851 4.641241 Hapmap60681-rs29013301
30 4 25933587 4.352451 Hapmap50554-BTA-107048
31 4 26511448 4.537662 Hapmap59011-rs29027498
32 4 27021431 5.045447 Hapmap59743-rs29017061
33 4 27094376 5.485063 BTB-00170785
34 4 50755462 5.015905 ARS-BFGL-NGS-12139
35 4 7873471 4.610767 Hapmap60503-rs29018741
36 4 95650788 4.825103 Hapmap58854-rs29023486
37 6 36184467 4.758110 Hapmap23854-BTC-062412
38 7 110306791 4.609691 BTB-01148543
9.3.6.7.2 Step 02 = Bring the rsID to the file with SNP name.

After get the rsID from SNPCHIMP I produced this table with the code bellow:

rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/SNPchimp_result_1723415732_BLUPF90GWAS.tsv", header = T)

merged <- merge(rsid, gwas_snpname, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "LOG_P")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "LOG_P")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/gwas_ind_seg_sig_SNPname_rsID.csv")
X SNP_name rsID CHR BP LOG_P
1 ARS-BFGL-NGS-110705 rs110079750 4 103979600 5.591006
2 ARS-BFGL-NGS-112149 rs109276211 17 48800954 5.268940
3 ARS-BFGL-NGS-112210 rs109631116 21 4055731 4.493938
4 ARS-BFGL-NGS-115605 rs42029843 23 44822126 4.316434
5 ARS-BFGL-NGS-116552 rs110428837 28 20016672 5.130557
6 ARS-BFGL-NGS-118207 rs110081798 3 110866523 4.815943
7 ARS-BFGL-NGS-12139 rs110160157 4 50755462 5.015905
8 ARS-BFGL-NGS-12510 rs110038841 17 46487068 4.547766
9 ARS-BFGL-NGS-25298 rs109868537 3 111772736 5.088738
10 ARS-BFGL-NGS-30515 rs110565206 15 57930281 4.514377
11 ARS-BFGL-NGS-37809 rs42751504 3 111751663 4.390707
12 ARS-BFGL-NGS-41631 rs110121846 29 33039863 4.933140
13 ARS-BFGL-NGS-45265 rs110935391 4 107787202 5.375628
14 ARS-BFGL-NGS-71077 rs109584097 28 35807366 4.695875
15 ARS-BFGL-NGS-74948 rs41585925 3 111526170 4.760351
16 ARS-BFGL-NGS-82859 rs110506037 14 15929822 5.467439
17 ARS-BFGL-NGS-87412 rs109273103 17 47884497 6.622354
18 ARS-BFGL-NGS-91119 rs109575643 20 65351653 4.891206
19 BTB-00096979 rs43305418 2 41602429 4.497977
20 BTB-00170785 rs43377276 4 27094376 5.485063
21 BTB-00488482 rs43691687 12 17477322 4.417576
22 BTB-00594449 rs41764450 15 33066384 5.041271
23 BTB-01148543 rs42305073 7 110306791 4.609691
24 BTB-01341053 rs42462935 20 60365668 4.603817
25 BTB-01641394 rs42752353 3 111730561 4.390707
26 Hapmap23854-BTC-062412 rs81154019 6 36184467 4.758110
27 Hapmap47669-BTA-59022 rs41645754 24 857728 4.458018
28 Hapmap48062-BTA-72409 rs41566051 4 109568557 4.709882
29 Hapmap48777-BTA-47434 rs41636137 2 41670655 4.377284
30 Hapmap50554-BTA-107048 rs41615935 4 25933587 4.352451
31 Hapmap53065-rs29026778 rs29026778 2 118820218 4.420962
32 Hapmap55558-rs29013980 rs29013980 11 19779915 4.828190
33 Hapmap58854-rs29023486 rs29023486 4 95650788 4.825103
34 Hapmap59011-rs29027498 rs29027498 4 26511448 4.537662
35 Hapmap59743-rs29017061 rs29017061 4 27021431 5.045447
36 Hapmap60503-rs29018741 rs29018741 4 7873471 4.610767
37 Hapmap60681-rs29013301 rs29013301 4 24239851 4.641241
38 UA-IFASA-2147 rs29012492 4 110053134 5.245598

9.3.7 BLUPF90+ GALLO

Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)

# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8

# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)

NeL <- Ne*L_M

# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)


# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)


# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval")
colnames(gwas) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
gwas <- filter(gwas, LOG_P >= bonf)


#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_genes_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_genes_50k.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_qtl_50k.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_qtl_50k_clean.csv")

The GALLO output are bellow:

For GENES

X V1 V2 LOG_P SNP CHR BP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 1 3 4.420962 4632 2 118820218 2 118803760 118824701 20942 + ENSBTAG00000030718 SPATA3 protein_coding
2 1 3 4.420962 4632 2 118820218 2 118831062 118836777 5716 + ENSBTAG00000054626 NA lncRNA
3 1 3 4.420962 4632 2 118820218 2 118846915 118854132 7218 + ENSBTAG00000026710 C2H2orf72 protein_coding
4 1 3 4.420962 4632 2 118820218 2 118865174 118946608 81435 + ENSBTAG00000005119 PSMD1 protein_coding
5 1 3 4.377284 3498 2 41670655 2 41676135 42259624 583490 - ENSBTAG00000005562 GALNT13 protein_coding
6 1 3 4.815943 6862 3 110866523 3 110769753 110827938 58186 + ENSBTAG00000013867 DLGAP3 protein_coding
7 1 3 4.815943 6862 3 110866523 3 110835276 110839536 4261 + ENSBTAG00000014235 SMIM12 protein_coding
8 1 3 4.815943 6862 3 110866523 3 110888198 110890904 2707 - ENSBTAG00000013881 GJA4 protein_coding
9 1 3 4.815943 6862 3 110866523 3 110900183 110905849 5667 - ENSBTAG00000012584 GJB3 protein_coding
10 1 3 4.760351 6873 3 111526170 3 111552563 111568286 15724 - ENSBTAG00000020798 C3H1orf94 protein_coding
11 1 3 4.390707 6880 3 111730561 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
12 1 3 4.390707 6881 3 111751663 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
13 1 3 5.088738 6882 3 111772736 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
14 1 3 4.537662 7515 4 26511448 4 26324012 26469689 145678 - ENSBTAG00000014074 SNX13 protein_coding
15 1 3 5.485063 7526 4 27094376 4 27093041 27667961 574921 + ENSBTAG00000003808 HDAC9 protein_coding
16 1 3 4.825103 8704 4 95650788 4 95631850 95631921 72 - ENSBTAG00000045386 bta-mir-320b miRNA
17 1 3 5.591006 8831 4 103979600 4 103877533 103947944 70412 - ENSBTAG00000004799 DENND2A protein_coding
18 1 3 5.591006 8831 4 103979600 4 103994401 104008963 14563 + ENSBTAG00000020484 ADCK2 protein_coding
19 1 3 5.591006 8831 4 103979600 4 104013524 104019157 5634 + ENSBTAG00000021759 NDUFB2 protein_coding
20 1 3 5.591006 8831 4 103979600 4 104027627 104194547 166921 - ENSBTAG00000021761 BRAF protein_coding
21 1 3 5.375628 8888 4 107787202 4 107701708 108020989 319282 + ENSBTAG00000054424 NA protein_coding
22 1 3 5.375628 8888 4 107787202 4 107718449 108101163 382715 - ENSBTAG00000017676 TPK1 protein_coding
23 1 3 4.709882 8904 4 109568557 4 109526182 109526350 169 - ENSBTAG00000053666 NA misc_RNA
24 1 3 4.610767 7184 4 7873471 4 7867490 7867588 99 - ENSBTAG00000045389 U6 snRNA
25 1 3 5.015905 7910 4 50755462 4 50743789 50957593 213805 - ENSBTAG00000006589 CFTR protein_coding
26 1 3 4.758110 11511 6 36184467 6 36052150 36198478 146329 - ENSBTAG00000010120 HERC3 protein_coding
27 1 3 4.609691 14772 7 110306791 7 110296879 110565532 268654 + ENSBTAG00000035662 CAMK4 protein_coding
28 1 3 4.828190 20558 11 19779915 11 19680235 19765027 84793 - ENSBTAG00000001114 PRKD3 protein_coding
29 1 3 4.828190 20558 11 19779915 11 19792170 19822758 30589 + ENSBTAG00000013923 QPCT protein_coding
30 1 3 5.467439 25167 14 15929822 14 15865228 15995482 130255 - ENSBTAG00000013537 FER1L6 protein_coding
31 1 3 5.467439 25167 14 15929822 14 15914441 15917042 2602 + ENSBTAG00000048373 NA protein_coding
32 1 3 5.041271 26782 15 33066384 15 32820702 33050885 230184 - ENSBTAG00000054820 NA lncRNA
33 1 3 4.547766 29742 17 46487068 17 46406767 46715519 308753 + ENSBTAG00000009797 RIMBP2 protein_coding
34 1 3 6.622354 29772 17 47884497 17 47426664 48084896 658233 + ENSBTAG00000002950 TMEM132D protein_coding
35 1 3 5.268940 29784 17 48800954 17 48417339 48868560 451222 - ENSBTAG00000046256 TMEM132C protein_coding
36 1 3 4.891206 33665 20 65351653 20 65311530 65343117 31588 - ENSBTAG00000009401 MTRR protein_coding
37 1 3 4.891206 33665 20 65351653 20 65343172 65354775 11604 + ENSBTAG00000009400 FASTKD3 protein_coding
38 1 3 4.891206 33665 20 65351653 20 65368557 65384042 15486 + ENSBTAG00000055034 CFAP90 protein_coding
39 1 3 4.891206 33665 20 65351653 20 65388705 65842986 454282 - ENSBTAG00000019210 ADCY2 protein_coding
40 1 3 4.493938 33836 21 4055731 21 3866347 4146441 280095 - ENSBTAG00000013422 GABRB3 protein_coding
41 1 3 4.316434 36688 23 44822126 23 44802116 44835671 33556 - ENSBTAG00000010256 TMEM170B protein_coding
42 1 3 4.458018 36876 24 857728 24 779627 866891 87265 - ENSBTAG00000000656 NFATC1 protein_coding
43 1 3 4.458018 36876 24 857728 24 879074 1029386 150313 - ENSBTAG00000001224 ATP9B protein_coding
44 1 3 4.695875 41057 28 35807366 28 35805434 35823873 18440 + ENSBTAG00000021416 PRXL2A protein_coding
45 1 3 4.695875 41057 28 35807366 28 35848163 35904370 56208 + ENSBTAG00000003907 TSPAN14 protein_coding
46 1 3 4.933140 41785 29 33039863 29 32993975 33024632 30658 - ENSBTAG00000003176 JAM3 protein_coding
47 1 3 4.933140 41785 29 33039863 29 33036823 33044208 7386 - ENSBTAG00000052082 NA lncRNA

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 2 3497 41602429 Health 41584083 41584087 32371
2 2 3497 41602429 Milk 41646649 41646653 113699
3 2 3498 41670655 Milk 41646649 41646653 113699
4 2 3498 41670655 Milk 41668170 41668174 113700
5 2 3498 41670655 Meat_and_Carcass 41676143 41676147 154062
6 2 3498 41670655 Reproduction 41693862 41693866 29899
7 2 4632 118820218 Milk 118779183 118779187 200902
8 2 4632 118820218 Milk 118779985 118779989 202500
9 2 4632 118820218 Milk 118781186 118781190 202905
10 2 4632 118820218 Milk 118785252 118785256 201638
11 2 4632 118820218 Milk 118789209 118789213 202885
12 2 4632 118820218 Reproduction 118820216 118820220 39599
13 2 4632 118820218 Reproduction 118820216 118820220 39600
14 2 4632 118820218 Reproduction 118820216 118820220 39601
15 2 4632 118820218 Exterior 118820216 118820220 39602
16 2 4632 118820218 Exterior 118820216 118820220 39603
17 2 4632 118820218 Milk 118820216 118820220 39604
18 2 4632 118820218 Production 118820216 118820220 39605
19 2 4632 118820218 Exterior 118820216 118820220 39606
20 2 4632 118820218 Milk 118820216 118820220 39607
21 2 4632 118820218 Production 118820216 118820220 39608
22 2 4632 118820218 Production 118820216 118820220 39609
23 2 4632 118820218 Exterior 118820216 118820220 39610
24 2 4632 118820218 Exterior 118820216 118820220 39611
25 2 4632 118820218 Production 118820216 118820220 39612
26 2 4632 118820218 Reproduction 118820216 118820220 39613
27 2 4632 118820218 Health 118820216 118820220 39614
28 2 4632 118820218 Exterior 118820216 118820220 39615
29 2 4632 118820218 Exterior 118820216 118820220 39616
30 2 4632 118820218 Reproduction 118820216 118820220 125235
31 3 6862 110866523 Meat_and_Carcass 110866521 110866525 152332
32 3 6873 111526170 Health 111487002 111487006 137202
33 3 6873 111526170 Health 111487002 111487006 170274
34 3 6873 111526170 Health 111509252 111509256 170275
35 3 6873 111526170 Health 111515878 111515882 170276
36 3 6880 111730561 Reproduction 111708234 111708238 30008
37 3 6881 111751663 Reproduction 111708234 111708238 30008
38 3 6880 111730561 Reproduction 111708234 111708238 30244
39 3 6881 111751663 Reproduction 111708234 111708238 30244
40 3 6880 111730561 Meat_and_Carcass 111708234 111708238 152258
41 3 6881 111751663 Meat_and_Carcass 111708234 111708238 152258
42 3 6880 111730561 Meat_and_Carcass 111717521 111717525 225527
43 3 6881 111751663 Meat_and_Carcass 111717521 111717525 225527
44 4 7184 7873471 Milk 7839854 7839858 118382
45 4 7184 7873471 Milk 7851689 7851693 118383
46 4 7184 7873471 Milk 7878682 7878686 113676
47 4 7184 7873471 Milk 7878682 7878686 118072
48 4 7471 24239851 Meat_and_Carcass 24248260 24248264 228128
49 4 7471 24239851 Reproduction 24279008 24279012 212516
50 4 7471 24239851 Meat_and_Carcass 24288010 24288014 107768
51 4 7471 24239851 Meat_and_Carcass 24288010 24288014 107796
52 4 7515 26511448 Exterior 26511446 26511450 66124
53 4 7524 27021431 Production 27014514 27014518 65173
54 4 7910 50755462 Meat_and_Carcass 50781007 50781011 228112
55 4 8831 103979600 Health 103949847 103949851 96218
56 4 8831 103979600 Meat_and_Carcass 104013536 104013540 231600
57 4 8888 107787202 Reproduction 107749162 107749166 106782
58 6 11511 36184467 Milk 36141496 36141500 244115
59 6 11511 36184467 Milk 36141496 36141500 246435
60 6 11511 36184467 Milk 36141496 36141500 250700
61 6 11511 36184467 Production 36143407 36143411 190189
62 6 11511 36184467 Production 36143929 36143933 187391
63 6 11511 36184467 Production 36145023 36145027 183480
64 6 11511 36184467 Production 36145023 36145027 190272
65 6 11511 36184467 Production 36145254 36145258 182513
66 6 11511 36184467 Production 36145254 36145258 186692
67 6 11511 36184467 Production 36147416 36147420 187702
68 6 11511 36184467 Production 36147705 36147709 186912
69 6 11511 36184467 Production 36148373 36148377 182603
70 6 11511 36184467 Production 36148373 36148377 188134
71 6 11511 36184467 Production 36149039 36149043 186551
72 6 11511 36184467 Production 36150874 36150878 183765
73 6 11511 36184467 Production 36150874 36150878 186794
74 6 11511 36184467 Production 36151323 36151327 183528
75 6 11511 36184467 Production 36151323 36151327 187598
76 6 11511 36184467 Production 36153011 36153015 188352
77 6 11511 36184467 Production 36154453 36154457 188950
78 6 11511 36184467 Production 36156604 36156608 187155
79 6 11511 36184467 Production 36156892 36156896 183350
80 6 11511 36184467 Production 36156892 36156896 187196
81 6 11511 36184467 Production 36157620 36157624 23700
82 6 11511 36184467 Production 36157620 36157624 66352
83 6 11511 36184467 Production 36157620 36157624 66353
84 6 11511 36184467 Production 36157620 36157624 66354
85 6 11511 36184467 Production 36157620 36157624 66355
86 6 11511 36184467 Production 36157620 36157624 67222
87 6 11511 36184467 Production 36157620 36157624 67223
88 6 11511 36184467 Production 36157620 36157624 164068
89 6 11511 36184467 Production 36157620 36157624 164167
90 6 11511 36184467 Meat_and_Carcass 36157620 36157624 164343
91 6 11511 36184467 Production 36157620 36157624 164369
92 6 11511 36184467 Production 36157620 36157624 164488
93 6 11511 36184467 Production 36157620 36157624 183351
94 6 11511 36184467 Production 36157620 36157624 186777
95 6 11511 36184467 Production 36157986 36157990 187308
96 6 11511 36184467 Production 36158359 36158363 187062
97 6 11511 36184467 Production 36158498 36158502 186952
98 6 11511 36184467 Production 36159606 36159610 183352
99 6 11511 36184467 Production 36159606 36159610 187046
100 6 11511 36184467 Production 36160694 36160698 187096
101 6 11511 36184467 Production 36161273 36161277 187575
102 6 11511 36184467 Production 36161375 36161379 186498
103 6 11511 36184467 Production 36161625 36161629 187580
104 6 11511 36184467 Production 36161633 36161637 187885
105 6 11511 36184467 Production 36162950 36162954 187283
106 6 11511 36184467 Production 36163340 36163344 187023
107 6 11511 36184467 Production 36163428 36163432 186547
108 6 11511 36184467 Reproduction 36163939 36163943 14703
109 6 11511 36184467 Reproduction 36163939 36163943 14707
110 6 11511 36184467 Production 36163939 36163943 189833
111 6 11511 36184467 Production 36164020 36164024 189834
112 6 11511 36184467 Production 36164564 36164568 187124
113 6 11511 36184467 Production 36165001 36165005 186965
114 6 11511 36184467 Production 36165137 36165141 187629
115 6 11511 36184467 Production 36165150 36165154 188219
116 6 11511 36184467 Production 36165218 36165222 186478
117 6 11511 36184467 Production 36165305 36165309 187300
118 6 11511 36184467 Production 36165645 36165649 186485
119 6 11511 36184467 Production 36166108 36166112 187795
120 6 11511 36184467 Production 36166178 36166182 187257
121 6 11511 36184467 Production 36166205 36166209 187129
122 6 11511 36184467 Production 36166298 36166302 187368
123 6 11511 36184467 Reproduction 36166503 36166507 14704
124 6 11511 36184467 Milk 36166503 36166507 105821
125 6 11511 36184467 Production 36166503 36166507 189279
126 6 11511 36184467 Production 36166791 36166795 189278
127 6 11511 36184467 Production 36166795 36166799 182735
128 6 11511 36184467 Production 36166795 36166799 189277
129 6 11511 36184467 Production 36167325 36167329 183190
130 6 11511 36184467 Production 36167325 36167329 186569
131 6 11511 36184467 Production 36167828 36167832 183191
132 6 11511 36184467 Production 36167828 36167832 188015
133 6 11511 36184467 Production 36168124 36168128 183305
134 6 11511 36184467 Production 36168124 36168128 186440
135 6 11511 36184467 Production 36168592 36168596 182749
136 6 11511 36184467 Production 36168592 36168596 186494
137 6 11511 36184467 Production 36168720 36168724 182760
138 6 11511 36184467 Production 36168720 36168724 186560
139 6 11511 36184467 Production 36169712 36169716 187059
140 6 11511 36184467 Production 36169792 36169796 186511
141 6 11511 36184467 Milk 36170113 36170117 140218
142 6 11511 36184467 Production 36170133 36170137 186638
143 6 11511 36184467 Production 36170199 36170203 187467
144 6 11511 36184467 Production 36171684 36171688 188463
145 6 11511 36184467 Production 36172370 36172374 187237
146 6 11511 36184467 Milk 36172376 36172380 246431
147 6 11511 36184467 Production 36177199 36177203 183152
148 6 11511 36184467 Production 36177199 36177203 188011
149 6 11511 36184467 Production 36180643 36180647 187727
150 6 11511 36184467 Production 36181019 36181023 187465
151 6 11511 36184467 Production 36182792 36182796 187002
152 6 11511 36184467 Production 36183911 36183915 186509
153 6 11511 36184467 Milk 36185332 36185336 250783
154 6 11511 36184467 Milk 36186103 36186107 103888
155 6 11511 36184467 Milk 36186103 36186107 104752
156 6 11511 36184467 Production 36186581 36186585 188641
157 6 11511 36184467 Production 36186688 36186692 189503
158 6 11511 36184467 Production 36187039 36187043 188875
159 6 11511 36184467 Production 36189076 36189080 188768
160 6 11511 36184467 Production 36191254 36191258 187381
161 6 11511 36184467 Production 36198700 36198704 186480
162 6 11511 36184467 Production 36203596 36203600 187794
163 6 11511 36184467 Milk 36204034 36204038 138157
164 6 11511 36184467 Production 36204034 36204038 186572
165 6 11511 36184467 Milk 36205214 36205218 15002
166 6 11511 36184467 Production 36206851 36206855 188861
167 6 11511 36184467 Production 36210548 36210552 187623
168 6 11511 36184467 Production 36211595 36211599 187645
169 6 11511 36184467 Production 36212184 36212188 189171
170 6 11511 36184467 Production 36212912 36212916 187151
171 6 11511 36184467 Production 36216422 36216426 187483
172 6 11511 36184467 Milk 36218000 36218004 103890
173 6 11511 36184467 Milk 36218000 36218004 104754
174 6 11511 36184467 Milk 36219842 36219846 105620
175 6 11511 36184467 Production 36219842 36219846 186455
176 6 11511 36184467 Production 36221082 36221086 187299
177 6 11511 36184467 Production 36221438 36221442 187378
178 6 11511 36184467 Production 36225616 36225620 186759
179 6 11511 36184467 Production 36226441 36226445 186605
180 6 11511 36184467 Production 36226963 36226967 23701
181 6 11511 36184467 Production 36226963 36226967 67245
182 6 11511 36184467 Production 36226963 36226967 67246
183 6 11511 36184467 Production 36226963 36226967 67247
184 6 11511 36184467 Milk 36226963 36226967 105509
185 6 11511 36184467 Production 36226963 36226967 164069
186 6 11511 36184467 Production 36226963 36226967 164370
187 6 11511 36184467 Production 36226963 36226967 186841
188 6 11511 36184467 Production 36227839 36227843 182676
189 6 11511 36184467 Production 36227839 36227843 188422
190 6 11511 36184467 Production 36232422 36232426 187967
191 6 11511 36184467 Production 36232742 36232746 187367
192 6 11511 36184467 Production 36233671 36233675 188839
193 14 25167 15929822 Milk 15895119 15895123 25606
194 14 25167 15929822 Milk 15895119 15895123 26355
195 14 25167 15929822 Meat_and_Carcass 15895119 15895123 36989
196 14 25167 15929822 Milk 15904094 15904098 104606
197 14 25167 15929822 Milk 15905538 15905542 104607
198 14 25167 15929822 Exterior 15929820 15929824 220989
199 14 25167 15929822 Production 15964283 15964287 122755
200 14 25167 15929822 Production 15964283 15964287 123539
201 14 25167 15929822 Production 15965017 15965021 122756
202 14 25167 15929822 Production 15965017 15965021 123540
203 14 25167 15929822 Production 15967515 15967519 122757
204 14 25167 15929822 Production 15967515 15967519 123541
205 14 25167 15929822 Production 15969384 15969388 122758
206 14 25167 15929822 Production 15969384 15969388 123542
207 14 25167 15929822 Production 15972216 15972220 122759
208 14 25167 15929822 Production 15972216 15972220 123543
209 14 25167 15929822 Production 15977822 15977826 122760
210 14 25167 15929822 Production 15977822 15977826 123544
211 15 26782 33066384 Production 33066382 33066386 68791
212 15 26782 33066384 Meat_and_Carcass 33107568 33107572 228743
213 15 27199 57930281 Milk 57892792 57892796 242339
214 15 27199 57930281 Milk 57914040 57914044 248429
215 15 27199 57930281 Milk 57926776 57926780 242396
216 15 27199 57930281 Milk 57926776 57926780 248428
217 15 27199 57930281 Milk 57949776 57949780 242391
218 17 29742 46487068 Milk 46449873 46449877 162016
219 17 29742 46487068 Milk 46449873 46449877 163641
220 17 29742 46487068 Milk 46452018 46452022 162017
221 17 29742 46487068 Milk 46452018 46452022 163642
222 17 29742 46487068 Milk 46455275 46455279 163230
223 17 29742 46487068 Milk 46455275 46455279 163643
224 17 29742 46487068 Milk 46457135 46457139 163231
225 17 29742 46487068 Milk 46457135 46457139 163644
226 17 29742 46487068 Milk 46460742 46460746 163159
227 17 29742 46487068 Milk 46460742 46460746 163583
228 17 29742 46487068 Milk 46463937 46463941 163232
229 17 29742 46487068 Milk 46463937 46463941 163645
230 17 29742 46487068 Milk 46474328 46474332 162015
231 17 29742 46487068 Milk 46474328 46474332 163640
232 17 29742 46487068 Milk 46474997 46475001 162014
233 17 29742 46487068 Milk 46474997 46475001 163639
234 17 29742 46487068 Milk 46476243 46476247 162013
235 17 29742 46487068 Milk 46476243 46476247 163638
236 17 29742 46487068 Milk 46476816 46476820 162012
237 17 29742 46487068 Milk 46476816 46476820 163637
238 17 29742 46487068 Milk 46480499 46480503 161948
239 17 29742 46487068 Milk 46480499 46480503 162037
240 17 29742 46487068 Milk 46481110 46481114 161941
241 17 29742 46487068 Milk 46481110 46481114 162030
242 17 29742 46487068 Milk 46481709 46481713 162019
243 17 29742 46487068 Milk 46481709 46481713 162100
244 17 29742 46487068 Milk 46482674 46482678 162020
245 17 29742 46487068 Milk 46482674 46482678 162101
246 17 29742 46487068 Milk 46483484 46483488 162021
247 17 29742 46487068 Milk 46483484 46483488 162102
248 17 29742 46487068 Milk 46529284 46529288 163162
249 17 29742 46487068 Milk 46529284 46529288 163584
250 17 29772 47884497 Milk 47837023 47837027 162764
251 17 29772 47884497 Milk 47844960 47844964 162876
252 17 29772 47884497 Milk 47851806 47851810 162921
253 17 29772 47884497 Milk 47855252 47855256 162648
254 17 29772 47884497 Production 47856400 47856404 23869
255 17 29772 47884497 Production 47856400 47856404 68904
256 17 29772 47884497 Exterior 47856400 47856404 102064
257 17 29772 47884497 Exterior 47884495 47884499 102065
258 17 29772 47884497 Milk 47924476 47924480 163030
259 17 29772 47884497 Milk 47924476 47924480 163517
260 17 29772 47884497 Milk 47926919 47926923 162858
261 17 29772 47884497 Milk 47926919 47926923 163422
262 17 29784 48800954 Milk 48759668 48759672 63966
263 20 33665 65351653 Milk 65321359 65321363 176120
264 20 33665 65351653 Milk 65350145 65350149 69962
265 20 33665 65351653 Milk 65352215 65352219 69963
266 20 33665 65351653 Milk 65357974 65357978 69964
267 20 33665 65351653 Milk 65363596 65363600 69626
268 20 33665 65351653 Health 65367232 65367236 167647
269 21 33836 4055731 Reproduction 4074797 4074801 145524
270 21 33836 4055731 Reproduction 4079204 4079208 145458
271 21 33836 4055731 Reproduction 4079247 4079251 145445
272 21 33836 4055731 Reproduction 4085667 4085671 145389
273 21 33836 4055731 Reproduction 4085667 4085671 146710
274 21 33836 4055731 Reproduction 4085817 4085821 146703
275 23 36688 44822126 Milk 44787659 44787663 254748
276 23 36688 44822126 Milk 44789048 44789052 254706
277 23 36688 44822126 Meat_and_Carcass 44807487 44807491 226965
278 23 36688 44822126 Meat_and_Carcass 44807487 44807491 229458
279 23 36688 44822126 Meat_and_Carcass 44807487 44807491 234754
280 23 36688 44822126 Milk 44808987 44808991 254767
281 23 36688 44822126 Milk 44830780 44830784 254833
282 23 36688 44822126 Milk 44834127 44834131 254725
283 23 36688 44822126 Milk 44845256 44845260 254688
284 23 36688 44822126 Milk 44848146 44848150 254689
285 23 36688 44822126 Milk 44859653 44859657 254769
286 24 36876 857728 Exterior 857726 857730 66121
287 24 36876 857728 Reproduction 885598 885602 106889
288 28 40795 20016672 Milk 20016670 20016674 112226
289 28 40795 20016672 Milk 20016670 20016674 118869
290 28 40795 20016672 Milk 20017763 20017767 112227
291 28 40795 20016672 Milk 20017763 20017767 118870
292 28 40795 20016672 Milk 20045011 20045015 118664
293 28 40795 20016672 Milk 20060184 20060188 118665
294 28 41057 35807366 Reproduction 35807364 35807368 138601
295 28 41057 35807366 Reproduction 35807364 35807368 138602
296 29 41785 33039863 Production 33039861 33039865 69450
297 29 41785 33039863 Production 33039861 33039865 69451

QTL type

#plotting the percentage of each QTL class annoatted
oldpar <- par(mar=c(1,15,0.5,1))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1.5)

My Image

Production QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Production")

My Image

Reproduction QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

My Image

Milk QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Milk")

My Image

Meat and Carcass QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class=""Meat_and_Carcass"")

My Image

Health QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

My Image

Exterior QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Exterior")

My Image

9.3.7.1 QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_name.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_type.csv")


#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
27 Metabolic body weight 83 4039 292 163224 0.0000000 0.0000000 Production
29 Milk butyric acid content 22 828 292 163224 0.0000000 0.0000000 Milk
30 Milk caproic acid content 18 675 292 163224 0.0000000 0.0000000 Milk
12 Dry matter intake 17 2186 292 163224 0.0000006 0.0000090 Production
2 Aggressive behavior 2 8 292 163224 0.0000887 0.0008719 Exterior
25 Length of productive life 13 2004 292 163224 0.0000815 0.0008719 Production
35 Milk pentadecylic acid content 5 280 292 163224 0.0001640 0.0013823 Milk
33 Milk iron content 4 217 292 163224 0.0006685 0.0049302 Milk
15 Dystocia 2 31 292 163224 0.0014331 0.0090411 Reproduction
16 Fecal larva count 3 125 292 163224 0.0015324 0.0090411 Health
4 Body weight 16 4289 292 163224 0.0050041 0.0268400 Production
14 Duration of inactivity during open field test 1 9 292 163224 0.0159862 0.0785990 Exterior
13 Duration of exploration during novel object test 1 16 292 163224 0.0282437 0.1281828 Exterior
5 Body weight gain 6 1354 292 163224 0.0362258 0.1526660 Production
3 Anti-Müllerian hormone level 1 26 292 163224 0.0454908 0.1789305 Health
46 Polyunsaturated fatty acid content 1 28 292 163224 0.0489034 0.1803314 Meat and Carcass
45 Palmitoleic acid content 1 46 292 163224 0.0790752 0.2744374 Meat and Carcass
42 Muscle sodium content 1 56 292 163224 0.0954231 0.3127758 Meat and Carcass
28 Milk alpha-S1-casein percentage 1 65 292 163224 0.1098888 0.3382449 Milk
43 Myristoleic acid content 1 68 292 163224 0.1146593 0.3382449 Meat and Carcass
39 Milk unglycosylated kappa-casein percentage 7 2351 292 163224 0.1315212 0.3695119 Milk
21 Interdigital hyperplasia 1 93 292 163224 0.1534352 0.4052354 Exterior
41 Multiple birth 1 96 292 163224 0.1579731 0.4052354 Reproduction
19 Gestation length 2 636 292 163224 0.3149170 0.7146195 Reproduction
23 Interval to first estrus after calving 3 1053 292 163224 0.2917668 0.7146195 Reproduction
36 Milk protein percentage 18 8803 292 163224 0.3139794 0.7146195 Milk
8 Calving ease 6 3819 292 163224 0.6804471 0.9778366 Reproduction
10 Conception rate 2 1255 292 163224 0.6577041 0.9778366 Reproduction
17 Feet and leg conformation 1 627 292 163224 0.6752950 0.9778366 Exterior
18 Foot angle 1 672 292 163224 0.7005282 0.9778366 Exterior
22 Interval from first to last insemination 1 445 292 163224 0.5497190 0.9778366 Reproduction
24 Lean meat yield 1 621 292 163224 0.6717742 0.9778366 Meat and Carcass
38 Milk riboflavin content 1 509 292 163224 0.5986072 0.9778366 Milk
48 PTA type 1 627 292 163224 0.6752950 0.9778366 Production
49 Rear leg placement - side view 1 430 292 163224 0.5374280 0.9778366 Exterior
50 Rump width 1 526 292 163224 0.6106790 0.9778366 Production
52 Somatic cell score 2 1122 292 163224 0.5971200 0.9778366 Health
53 Stillbirth 2 1363 292 163224 0.7013705 0.9778366 Reproduction
54 Strength 1 664 292 163224 0.6961897 0.9778366 Exterior
55 Subcutaneous fat thickness 1 331 292 163224 0.4474847 0.9778366 Meat and Carcass
57 Udder attachment 1 655 292 163224 0.6912339 0.9778366 Exterior
58 Udder depth 1 695 292 163224 0.7126605 0.9778366 Exterior
59 Udder height 1 504 292 163224 0.5949862 0.9778366 Exterior
6 Bovine respiratory disease susceptibility 1 789 292 163224 0.7573589 0.9935536 Health
20 Inseminations per conception 1 790 292 163224 0.7577951 0.9935536 Reproduction
1 Age at puberty 1 8222 292 163224 0.9999997 0.9999997 Reproduction
7 Bovine tuberculosis susceptibility 1 1155 292 163224 0.8744991 0.9999997 Health
9 Carcass weight 1 2020 292 163224 0.9737344 0.9999997 Meat and Carcass
11 Connective tissue amount 1 3142 292 163224 0.9965892 0.9999997 Meat and Carcass
26 Marbling score 1 1817 292 163224 0.9620577 0.9999997 Meat and Carcass
31 Milk fat percentage 9 10941 292 163224 0.9979006 0.9999997 Milk
32 Milk fat yield 2 8220 292 163224 0.9999954 0.9999997 Milk
34 Milk kappa-casein percentage 5 4499 292 163224 0.9064431 0.9999997 Milk
37 Milk protein yield 1 3093 292 163224 0.9962701 0.9999997 Milk
40 Milk yield 4 6432 292 163224 0.9970834 0.9999997 Milk
44 Net merit 1 903 292 163224 0.8023701 0.9999997 Production
47 Pregnancy rate 1 944 292 163224 0.8164356 0.9999997 Reproduction
51 Shear force 2 2954 292 163224 0.9692776 0.9999997 Meat and Carcass
56 Tenderness score 4 3483 292 163224 0.8712212 0.9999997 Meat and Carcass

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval
1 Exterior 12 9077 292 163224 0.8914062 1
2 Health 8 5889 292 163224 0.8293292 1
3 Meat and Carcass 15 18258 292 163224 0.9998999 1
4 Milk 97 75352 292 163224 0.9999974 1
5 Production 138 19640 292 163224 0.0000000 0
6 Reproduction 22 35008 292 163224 1.0000000 1

My Image

9.3.7.2 BLUPF90+ GPROFILER ON-LINE

From the online version of GPROFILER i got the following results.

Legend My Image

My Image

My Image

9.3.7.3 BLUPF90+ VEP

My Image

rs110079750 My Image

rs109276211 My Image

rs109631116 My Image

rs42029843 My Image

rs110428837 My Image

rs110081798 My Image

rs110160157 My Image

rs110038841 My Image

rs109868537 My Image

rs110565206 My Image

rs42751504 My Image

rs110121846 My Image

rs110935391 My Image

rs109584097 My Image

rs41585925 My Image

rs110506037 My Image

rs109273103 My Image

rs109575643 My Image

rs43305418 My Image

rs43377276 My Image

rs43691687 My Image

rs41764450 My Image

rs42305073 No

rs42462935 No

rs42752353 My Image

rs81154019 No

rs41645754 My Image

rs41566051 My Image

rs41636137 My Image

rs41615935 My Image

rs29026778 My Image

rs29013980 My Image

rs29023486 My Image

rs29027498 My Image

rs29017061 My Image

rs29018741 My Image

rs29013301 My Image

rs29012492 My Image

It is interesting that 3 significant SNPs falled in the same gene CSMD2 on chromosome 3 rs109868537 rs42751504 rs42752353

My Image

My Image

For the gene CNTNAP2 on chromosome 4 also have 2 significant SNPs rs41566051 rs29012492

My Image

9.4 BLUPF90+ WINDOWS

9.4.1 Running renf90_ssGWAS2_SNPeff_w.par for GWAS (WINDOWS)

The parameter card bellow we are going to run using the software postGSf90:

renf90_ssGWAS2_SNPeff_W_10.par

# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
           1
NUMBER_OF_EFFECTS
           3
OBSERVATION(S)
    1
WEIGHT(S)
 
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
 2         4 cross 
 3        22 cross 
 4      3724 cross 
RANDOM_RESIDUAL VALUES
   77182.    
 RANDOM_GROUP
     3
 RANDOM_TYPE
 add_an_upginb
 FILE
renadd03.ped                                                                    
(CO)VARIANCES
   28212.    
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
OPTION SNP_moving_average 10
OPTION windows_variance 10 
The parameter file above will generate the following files:
  • snp_sol
    • column 1: trait
    • column 2: effect
    • column 3: SNP
    • column 4: Chromosome
    • column 5: Position
    • column 6: SNP solution
    • column 7: weight
    • column 8: variance explained by n adjacents SNP (if OPTION windows_variance is used).
    • column 9: variance of the SNP solution (used to compute the p-value) (if OPTION snp_p_value is used)
  • snp_pred
    • contains allele frequencies + SNP effects
  • chrsnpvar
    • column 1: trait
    • column 2: effect
    • column 3: variance explained by n adjacents SNP
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • chrsnp_pval
    • column 1: trait
    • column 2: effect
    • column 3: -log10(p-value)
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position in bp
  • chrsnp
    • column 1: trait
    • column 2: effect
    • column 3: values of SNP effects to use in Manhattan plots → [abs(SNP_i)/SD(SNP)]
    • column 4: SNP
    • column 5: Chromosome
    • column 6: Position
  • windows_segments
    • column 1: label
    • column 2: window size (number of SNP)
    • column 3: Start SNP number for the window
    • column 4: End SNP number for the window
    • column 5: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
  • windows_variance
    • column 1: trait
    • column 2: effect
    • column 3: Start SNP number or SNP name for the window
    • column 4: End SNP number or SNP name for the window
    • column 5: window size (number of SNP)
    • column 6: Start (ChrNumber)’_’(Position) for the window
    • column 7: End (ChrNumber)’_’(Position) for the window
    • column 8: identification of window: (ChrNumber)’_’(startPositionMBP)
    • column 9: variance explained by n adjacents SNP
  • Vft1e3.R
  • Sft1e3.R
  • Pft1e3.R

Bellow we can see the SNPs that explain more than 0.5% of Genetic Variance

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.5)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")
snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]


rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/SNPchimp_result_3859303481.tsv", header = T)


merged <- merge(rsid, w_var, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "Var")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "Var")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/w10_snp_rsid_snpvar_05.csv")

Bellow we can see the SNPs that explain more than 0.5% of Genetic Variance

X SNP_name rsID CHR BP Var
1 ARS-BFGL-BAC-28665 rs111010562 24 28487771 0.6899732
2 ARS-BFGL-BAC-35548 rs110100182 2 115665427 1.2851806
3 ARS-BFGL-BAC-7444 rs110491621 13 18558540 0.6630789
4 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.1689686
5 ARS-BFGL-NGS-107330 rs109766798 2 116018639 0.5958551
6 ARS-BFGL-NGS-25298 rs109868537 3 111772736 1.2101267
7 ARS-BFGL-NGS-2713 rs41761360 15 34054485 0.5549892
8 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.3488469
9 ARS-BFGL-NGS-3276 rs110634531 20 12087403 0.5722138
10 ARS-BFGL-NGS-37809 rs42751504 3 111751663 1.0607739
11 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.8178265
12 ARS-BFGL-NGS-44131 rs110100483 3 111806406 1.0252690
13 ARS-BFGL-NGS-5141 rs110705087 24 28417928 0.5945940
14 ARS-BFGL-NGS-5976 rs41763278 15 34144843 0.5490537
15 ARS-BFGL-NGS-6202 rs110385521 3 111833768 1.1671762
16 ARS-BFGL-NGS-78615 rs110959523 20 12111883 0.5689079
17 ARS-BFGL-NGS-85333 rs110742206 3 111933069 0.6077153
18 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.7547885
19 ARS-BFGL-NGS-98724 rs109709275 15 34109962 0.6252263
20 BTA-25900-no-rs rs41575397 13 18509812 0.6364128
21 BTA-49096-no-rs rs41578131 2 115695003 1.6071883
22 BTA-73915-no-rs rs41648979 5 6312610 0.5103844
23 BTA-91816-no-rs rs41596771 15 11100934 0.9739683
24 BTB-01421844 rs42544667 15 11144666 0.7131750
25 BTB-01421892 rs42544714 15 11185546 0.6463689
26 BTB-01421934 rs42545356 15 11207429 0.8898567
27 BTB-01422008 rs42545430 15 11236303 0.6832306
28 BTB-01434227 rs42557533 10 50554831 0.6660810
29 BTB-01485274 rs42609685 24 28540641 0.8983644
30 BTB-01608944 rs42723390 15 10974997 1.1206945
31 BTB-01641394 rs42752353 3 111730561 0.9480214
32 BTB-01646599 rs42761380 24 28604672 0.5894579
33 BTB-01813405 rs42924913 15 10879514 0.5556342
34 BTB-01830390 rs42938737 15 10936002 1.0480277
35 BTB-01948148 rs43056622 3 111677167 0.7467347
36 BTB-02063964 rs43172105 15 10906064 0.8788211
37 Hapmap41888-BTA-49091 rs41645223 2 115622067 0.8268392
38 Hapmap42062-BTA-109789 rs41621207 3 111708236 1.2415855
39 Hapmap49833-BTA-103929 rs41603335 13 18386942 0.5282867
40 Hapmap50266-BTA-13664 rs29018622 13 18266073 0.6006650
41 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7398417
42 Hapmap54981-rs29019846 rs29019846 24 28516684 0.7747524
43 Hapmap58887-rs29013502 rs29013502 24 28570245 0.7619213

Bellow we can see the SNPs that explain more than 0.3% of Genetic Variance

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.3)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")
snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]

# We do this to copy the names of SNPs and paste in SNPCHIMP website to get the .tsv file bellow
snp_ids <- paste(w_var$SNP_ID, collapse = ", ")
snp_ids
rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/SNPchimp_result_1536968083.tsv", header = T)


merged <- merge(rsid, w_var, by.x ="SNP_name", by.y ="SNP_ID")

colnames(merged)

merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "Var")]

colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "Var")

write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/w10_snp_rsid_snpvar_03.csv")
X SNP_name rsID CHR BP Var
1 ARS-BFGL-BAC-2191 rs42812364 15 10827424 0.3020577
2 ARS-BFGL-BAC-28665 rs111010562 24 28487771 0.6899732
3 ARS-BFGL-BAC-31736 rs42930372 20 12166596 0.3700647
4 ARS-BFGL-BAC-35548 rs110100182 2 115665427 1.2851806
5 ARS-BFGL-BAC-35732 rs109538801 22 36432552 0.3176352
6 ARS-BFGL-BAC-7444 rs110491621 13 18558540 0.6630789
7 ARS-BFGL-NGS-101461 rs110600398 18 27938774 0.3809178
8 ARS-BFGL-NGS-101698 rs110435062 11 96235663 0.3932618
9 ARS-BFGL-NGS-103753 rs110842922 2 115821065 1.1689686
10 ARS-BFGL-NGS-107330 rs109766798 2 116018639 0.5958551
11 ARS-BFGL-NGS-110683 rs110606737 3 112064027 0.3321464
12 ARS-BFGL-NGS-111085 rs41756499 15 34205212 0.4880066
13 ARS-BFGL-NGS-111563 rs110964572 11 85771451 0.3654883
14 ARS-BFGL-NGS-111672 rs109491825 11 96285921 0.3447816
15 ARS-BFGL-NGS-113265 rs111014992 10 16182204 0.4067019
16 ARS-BFGL-NGS-116109 rs41757840 15 34225443 0.4224669
17 ARS-BFGL-NGS-116274 rs43704412 13 18626070 0.4947973
18 ARS-BFGL-NGS-117553 rs41994531 1 130269306 0.3155001
19 ARS-BFGL-NGS-21967 rs109669065 13 18489670 0.4937232
20 ARS-BFGL-NGS-25298 rs109868537 3 111772736 1.2101267
21 ARS-BFGL-NGS-26086 rs109526941 15 34001596 0.3355725
22 ARS-BFGL-NGS-2713 rs41761360 15 34054485 0.5549892
23 ARS-BFGL-NGS-30337 rs110485060 2 115730530 1.3488469
24 ARS-BFGL-NGS-30447 rs109870524 13 18189413 0.4135777
25 ARS-BFGL-NGS-30583 rs110619943 3 105587654 0.3004602
26 ARS-BFGL-NGS-3276 rs110634531 20 12087403 0.5722138
27 ARS-BFGL-NGS-37809 rs42751504 3 111751663 1.0607739
28 ARS-BFGL-NGS-40301 rs110114574 11 85359517 0.3055185
29 ARS-BFGL-NGS-41140 rs42042561 24 28638911 0.4372706
30 ARS-BFGL-NGS-43211 rs110382564 12 11150596 0.3360266
31 ARS-BFGL-NGS-43490 rs110266870 11 96322228 0.3858722
32 ARS-BFGL-NGS-43721 rs108974471 2 115986085 0.8178265
33 ARS-BFGL-NGS-44131 rs110100483 3 111806406 1.0252690
34 ARS-BFGL-NGS-5141 rs110705087 24 28417928 0.5945940
35 ARS-BFGL-NGS-56582 rs43268059 1 130204834 0.3492380
36 ARS-BFGL-NGS-5722 rs110803000 10 16160207 0.3713583
37 ARS-BFGL-NGS-5976 rs41763278 15 34144843 0.5490537
38 ARS-BFGL-NGS-6202 rs110385521 3 111833768 1.1671762
39 ARS-BFGL-NGS-71345 rs109181087 3 111991216 0.4965733
40 ARS-BFGL-NGS-71657 rs110799261 4 7455891 0.3278440
41 ARS-BFGL-NGS-75576 rs41760513 15 34025604 0.4695660
42 ARS-BFGL-NGS-78615 rs110959523 20 12111883 0.5689079
43 ARS-BFGL-NGS-85333 rs110742206 3 111933069 0.6077153
44 ARS-BFGL-NGS-8836 rs110305082 10 50727713 0.3454069
45 ARS-BFGL-NGS-97849 rs110553601 3 111965305 0.7547885
46 ARS-BFGL-NGS-98724 rs109709275 15 34109962 0.6252263
47 ARS-USMARC-Parent-EF026086-rs29013660 rs29013660 28 35132199 0.3313140
48 BTA-118245-no-rs rs41664902 20 12228546 0.3054249
49 BTA-25900-no-rs rs41575397 13 18509812 0.6364128
50 BTA-31832-no-rs rs41631683 13 18676544 0.3995710
51 BTA-49096-no-rs rs41578131 2 115695003 1.6071883
52 BTA-53040-no-rs rs41637809 1 130363507 0.3226769
53 BTA-73915-no-rs rs41648979 5 6312610 0.5103844
54 BTA-91816-no-rs rs41596771 15 11100934 0.9739683
55 BTA-91820-no-rs rs41572742 15 11310260 0.4420277
56 BTA-92902-no-rs rs41662308 10 50532545 0.4997714
57 BTA-96609-no-rs rs41616196 2 75615637 0.3299858
58 BTA-96614-no-rs rs41662071 2 75692826 0.3503897
59 BTA-97262-no-rs rs41609631 24 28452709 0.4809272
60 BTB-00058404 rs43268082 1 130225374 0.3332314
61 BTB-00113625 rs43326515 2 115504029 0.3271297
62 BTB-00280987 rs43487459 6 105860748 0.3147472
63 BTB-00593773 rs41757877 15 34249244 0.3560422
64 BTB-00685072 rs41848383 4 7427468 0.3386708
65 BTB-01421844 rs42544667 15 11144666 0.7131750
66 BTB-01421892 rs42544714 15 11185546 0.6463689
67 BTB-01421934 rs42545356 15 11207429 0.8898567
68 BTB-01422008 rs42545430 15 11236303 0.6832306
69 BTB-01434144 rs42556851 10 50632188 0.3879825
70 BTB-01434227 rs42557533 10 50554831 0.6660810
71 BTB-01485274 rs42609685 24 28540641 0.8983644
72 BTB-01485885 rs42606196 24 28353262 0.3220210
73 BTB-01608944 rs42723390 15 10974997 1.1206945
74 BTB-01610675 rs42733108 5 6016429 0.3122917
75 BTB-01641394 rs42752353 3 111730561 0.9480214
76 BTB-01646599 rs42761380 24 28604672 0.5894579
77 BTB-01753605 rs42862169 2 55510237 0.3562713
78 BTB-01813405 rs42924913 15 10879514 0.5556342
79 BTB-01830390 rs42938737 15 10936002 1.0480277
80 BTB-01948148 rs43056622 3 111677167 0.7467347
81 BTB-02063964 rs43172105 15 10906064 0.8788211
82 Hapmap23257-BTA-123353 rs43710679 2 55394810 0.4080735
83 Hapmap33880-BES9_Contig535_933 rs43559508 2 55577762 0.3108049
84 Hapmap35049-BES5_Contig425_820 rs42536809 18 21429894 0.3313303
85 Hapmap35643-SCAFFOLD318144_4314 rs29017226 3 111651143 0.3867037
86 Hapmap38323-BTA-49132 rs41578141 2 116064090 0.3476804
87 Hapmap38876-BTA-109334 rs110195627 11 85937536 0.3037616
88 Hapmap40249-BTA-49086 rs41645218 2 115533946 0.4998173
89 Hapmap41845-BTA-35927 rs41581283 14 12970779 0.3373003
90 Hapmap41888-BTA-49091 rs41645223 2 115622067 0.8268392
91 Hapmap42062-BTA-109789 rs41621207 3 111708236 1.2415855
92 Hapmap42359-BTA-90829 rs41593704 18 21454669 0.3210384
93 Hapmap44603-BTA-51543 rs41667474 20 12136194 0.4286374
94 Hapmap46385-BTA-90833 rs41593707 18 21391728 0.4494537
95 Hapmap48508-BTA-88108 rs41663417 2 55441390 0.3278748
96 Hapmap49833-BTA-103929 rs41603335 13 18386942 0.5282867
97 Hapmap50173-BTA-96591 rs41662054 2 75132529 0.3371572
98 Hapmap50266-BTA-13664 rs29018622 13 18266073 0.6006650
99 Hapmap50870-BTA-111277 rs41619094 10 50375243 0.3896697
100 Hapmap51989-BTA-92903 rs41662309 10 50608000 0.4718108
101 Hapmap52341-rs29025776 rs29025776 20 12040873 0.4138388
102 Hapmap54762-rs29012389 rs29012389 10 50673845 0.3575426
103 Hapmap54770-rs29009608 rs29009608 2 115875702 0.7398417
104 Hapmap54981-rs29019846 rs29019846 24 28516684 0.7747524
105 Hapmap58887-rs29013502 rs29013502 24 28570245 0.7619213
106 Hapmap59616-ss46526976 rs41257422 5 6281168 0.3864947
107 UA-IFASA-5633 rs41646367 28 35155098 0.3037150
108 UA-IFASA-7696 rs41603184 14 12882729 0.3361687

STOP STOP 23 DECEMBER 2024

9.4.2 Windows - BLUPF90+ GALLO

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.5)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")

snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]


# GALLO

#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")

#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")

#import MARKER files = the GWAS output
gwas <- w_var
colnames(gwas) <- c("CHR", "BP", "Var", "SNP")

#FINDING GENES AND QTLs ARROUND THE MARKER

#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2, 
                                            marker_file= gwas, 
                                            method = "gene",
                                            marker = "snp", 
                                            interval = 50000, 
                                            nThreads = NULL)

write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_genes_w.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_genes_w.csv")

#FINDING QTLs

out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2, 
                                          marker_file= gwas, 
                                          method = "qtl",
                                          marker = "snp", 
                                          interval = 50000, 
                                          nThreads = NULL)


write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_qtl_w.txt", 
            quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)

library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_qtl_w_clean.csv")

The GALLO output are bellow:

For GENES

X CHR BP Var SNP chr start_pos end_pos width strand gene_id gene_name gene_biotype
1 10 50554831 0.6660810 BTB-01434227 10 50557150 50607672 50523 + ENSBTAG00000013493 BNIP2 protein_coding
2 10 50554831 0.6660810 BTB-01434227 10 50582585 50614948 32364 + ENSBTAG00000010298 GTF2A2 protein_coding
3 10 50554831 0.6660810 BTB-01434227 10 50584841 50584967 127 - ENSBTAG00000043816 NA snoRNA
4 13 18266073 0.6006650 Hapmap50266-BTA-13664 13 18226732 18280982 54251 - ENSBTAG00000016060 CREM protein_coding
5 13 18386942 0.5282867 Hapmap49833-BTA-103929 13 18321888 18393476 71589 + ENSBTAG00000007133 CUL2 protein_coding
6 13 18509812 0.6364128 BTA-25900-no-rs 13 18449975 18466359 16385 + ENSBTAG00000052242 NA lncRNA
7 13 18509812 0.6364128 BTA-25900-no-rs 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
8 13 18558540 0.6630789 ARS-BFGL-BAC-7444 13 18484975 19062784 577810 + ENSBTAG00000014991 PARD3 protein_coding
9 15 34054485 0.5549892 ARS-BFGL-NGS-2713 15 34027052 34225316 198265 + ENSBTAG00000001410 GRAMD1B protein_coding
10 15 34109962 0.6252263 ARS-BFGL-NGS-98724 15 34027052 34225316 198265 + ENSBTAG00000001410 GRAMD1B protein_coding
11 15 34144843 0.5490537 ARS-BFGL-NGS-5976 15 34027052 34225316 198265 + ENSBTAG00000001410 GRAMD1B protein_coding
12 2 115821065 1.1689686 ARS-BFGL-NGS-103753 2 115812583 115843935 31353 - ENSBTAG00000021325 SLC19A3 protein_coding
13 2 115875702 0.7398417 Hapmap54770-rs29009608 2 115812583 115843935 31353 - ENSBTAG00000021325 SLC19A3 protein_coding
14 2 115821065 1.1689686 ARS-BFGL-NGS-103753 2 115838391 115840022 1632 + ENSBTAG00000007127 NA protein_coding
15 2 115875702 0.7398417 Hapmap54770-rs29009608 2 115838391 115840022 1632 + ENSBTAG00000007127 NA protein_coding
16 2 115986085 0.8178265 ARS-BFGL-NGS-43721 2 115948258 115951955 3698 + ENSBTAG00000021326 CCL20 protein_coding
17 2 115986085 0.8178265 ARS-BFGL-NGS-43721 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
18 2 116018639 0.5958551 ARS-BFGL-NGS-107330 2 115992779 116028253 35475 + ENSBTAG00000021327 DAW1 protein_coding
19 2 115622067 0.8268392 Hapmap41888-BTA-49091 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
20 2 115665427 1.2851806 ARS-BFGL-BAC-35548 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
21 2 115695003 1.6071883 BTA-49096-no-rs 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
22 2 115730530 1.3488469 ARS-BFGL-NGS-30337 2 115629949 115704725 74777 + ENSBTAG00000021323 AGFG1 protein_coding
23 2 115730530 1.3488469 ARS-BFGL-NGS-30337 2 115758046 115758105 60 + ENSBTAG00000054571 bta-mir-2285ao-1 miRNA
24 2 115821065 1.1689686 ARS-BFGL-NGS-103753 2 115791504 115808606 17103 + ENSBTAG00000050771 NA lncRNA
25 24 28487771 0.6899732 ARS-BFGL-BAC-28665 24 28485983 28486143 161 + ENSBTAG00000028575 U1 snRNA
26 24 28516684 0.7747524 Hapmap54981-rs29019846 24 28485983 28486143 161 + ENSBTAG00000028575 U1 snRNA
27 3 111677167 0.7467347 BTB-01948148 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
28 3 111708236 1.2415855 Hapmap42062-BTA-109789 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
29 3 111730561 0.9480214 BTB-01641394 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
30 3 111751663 1.0607739 ARS-BFGL-NGS-37809 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
31 3 111772736 1.2101267 ARS-BFGL-NGS-25298 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
32 3 111806406 1.0252690 ARS-BFGL-NGS-44131 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
33 3 111833768 1.1671762 ARS-BFGL-NGS-6202 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
34 3 111933069 0.6077153 ARS-BFGL-NGS-85333 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
35 3 111965305 0.7547885 ARS-BFGL-NGS-97849 3 111603940 112289188 685249 + ENSBTAG00000005784 CSMD2 protein_coding
36 3 111933069 0.6077153 ARS-BFGL-NGS-85333 3 111927003 111927727 725 - ENSBTAG00000000335 HMGB4 protein_coding
37 3 111965305 0.7547885 ARS-BFGL-NGS-97849 3 111927003 111927727 725 - ENSBTAG00000000335 HMGB4 protein_coding
38 5 6312610 0.5103844 BTA-73915-no-rs 5 6174722 6273171 98450 + ENSBTAG00000021756 ZDHHC17 protein_coding
39 5 6312610 0.5103844 BTA-73915-no-rs 5 6281008 6300975 19968 - ENSBTAG00000013406 CSRP2 protein_coding

FOR QTLs

X CHR SNP BP QTL_type start_pos end_pos QTL_ID
1 10 BTB-01434227 50554831 Health 50597586 50597590 156580
2 13 Hapmap49833-BTA-103929 18386942 Milk 18340131 18340135 116582
3 13 Hapmap49833-BTA-103929 18386942 Milk 18349683 18349687 116735
4 13 Hapmap49833-BTA-103929 18386942 Milk 18370805 18370809 249726
5 13 Hapmap49833-BTA-103929 18386942 Milk 18381731 18381735 116525
6 13 Hapmap49833-BTA-103929 18386942 Reproduction 18386940 18386944 46808
7 13 Hapmap49833-BTA-103929 18386942 Reproduction 18386940 18386944 46809
8 13 Hapmap49833-BTA-103929 18386942 Exterior 18386940 18386944 46810
9 13 Hapmap49833-BTA-103929 18386942 Exterior 18386940 18386944 46811
10 13 Hapmap49833-BTA-103929 18386942 Production 18386940 18386944 46812
11 13 Hapmap49833-BTA-103929 18386942 Exterior 18386940 18386944 46813
12 13 Hapmap49833-BTA-103929 18386942 Milk 18386940 18386944 46814
13 13 Hapmap49833-BTA-103929 18386942 Production 18386940 18386944 46815
14 13 Hapmap49833-BTA-103929 18386942 Production 18386940 18386944 46816
15 13 Hapmap49833-BTA-103929 18386942 Exterior 18386940 18386944 46817
16 13 Hapmap49833-BTA-103929 18386942 Exterior 18386940 18386944 46818
17 13 Hapmap49833-BTA-103929 18386942 Exterior 18386940 18386944 46819
18 13 Hapmap49833-BTA-103929 18386942 Reproduction 18386940 18386944 46820
19 13 Hapmap49833-BTA-103929 18386942 Reproduction 18386940 18386944 46821
20 13 Hapmap49833-BTA-103929 18386942 Exterior 18386940 18386944 46822
21 13 Hapmap49833-BTA-103929 18386942 Meat_and_Carcass 18386940 18386944 151596
22 13 Hapmap49833-BTA-103929 18386942 Meat_and_Carcass 18392222 18392226 226401
23 13 Hapmap49833-BTA-103929 18386942 Meat_and_Carcass 18392222 18392226 228662
24 13 Hapmap49833-BTA-103929 18386942 Meat_and_Carcass 18392222 18392226 234093
25 13 Hapmap49833-BTA-103929 18386942 Milk 18406821 18406825 116526
26 13 BTA-25900-no-rs 18509812 Reproduction 18489668 18489672 46823
27 13 BTA-25900-no-rs 18509812 Exterior 18489668 18489672 46824
28 13 BTA-25900-no-rs 18509812 Exterior 18489668 18489672 46825
29 13 BTA-25900-no-rs 18509812 Milk 18489668 18489672 46826
30 13 BTA-25900-no-rs 18509812 Production 18489668 18489672 46827
31 13 BTA-25900-no-rs 18509812 Exterior 18489668 18489672 46828
32 13 BTA-25900-no-rs 18509812 Exterior 18489668 18489672 46829
33 13 BTA-25900-no-rs 18509812 Production 18489668 18489672 46830
34 13 BTA-25900-no-rs 18509812 Production 18489668 18489672 46831
35 13 BTA-25900-no-rs 18509812 Exterior 18489668 18489672 46832
36 13 BTA-25900-no-rs 18509812 Exterior 18489668 18489672 46833
37 13 BTA-25900-no-rs 18509812 Reproduction 18489668 18489672 46834
38 13 BTA-25900-no-rs 18509812 Reproduction 18489668 18489672 46835
39 13 BTA-25900-no-rs 18509812 Exterior 18489668 18489672 46836
40 13 BTA-25900-no-rs 18509812 Reproduction 18509810 18509814 46837
41 13 ARS-BFGL-BAC-7444 18558540 Reproduction 18509810 18509814 46837
42 13 BTA-25900-no-rs 18509812 Reproduction 18509810 18509814 46838
43 13 ARS-BFGL-BAC-7444 18558540 Reproduction 18509810 18509814 46838
44 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46839
45 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46839
46 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46840
47 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46840
48 13 BTA-25900-no-rs 18509812 Production 18509810 18509814 46841
49 13 ARS-BFGL-BAC-7444 18558540 Production 18509810 18509814 46841
50 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46842
51 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46842
52 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46843
53 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46843
54 13 BTA-25900-no-rs 18509812 Production 18509810 18509814 46844
55 13 ARS-BFGL-BAC-7444 18558540 Production 18509810 18509814 46844
56 13 BTA-25900-no-rs 18509812 Production 18509810 18509814 46845
57 13 ARS-BFGL-BAC-7444 18558540 Production 18509810 18509814 46845
58 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46846
59 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46846
60 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46847
61 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46847
62 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46848
63 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46848
64 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46849
65 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46849
66 13 BTA-25900-no-rs 18509812 Reproduction 18509810 18509814 46850
67 13 ARS-BFGL-BAC-7444 18558540 Reproduction 18509810 18509814 46850
68 13 BTA-25900-no-rs 18509812 Health 18509810 18509814 46851
69 13 ARS-BFGL-BAC-7444 18558540 Health 18509810 18509814 46851
70 13 BTA-25900-no-rs 18509812 Reproduction 18509810 18509814 46852
71 13 ARS-BFGL-BAC-7444 18558540 Reproduction 18509810 18509814 46852
72 13 BTA-25900-no-rs 18509812 Exterior 18509810 18509814 46853
73 13 ARS-BFGL-BAC-7444 18558540 Exterior 18509810 18509814 46853
74 13 BTA-25900-no-rs 18509812 Reproduction 18558538 18558542 46854
75 13 ARS-BFGL-BAC-7444 18558540 Reproduction 18558538 18558542 46854
76 13 BTA-25900-no-rs 18509812 Exterior 18558538 18558542 46855
77 13 ARS-BFGL-BAC-7444 18558540 Exterior 18558538 18558542 46855
78 13 BTA-25900-no-rs 18509812 Exterior 18558538 18558542 46856
79 13 ARS-BFGL-BAC-7444 18558540 Exterior 18558538 18558542 46856
80 13 BTA-25900-no-rs 18509812 Milk 18558538 18558542 46857
81 13 ARS-BFGL-BAC-7444 18558540 Milk 18558538 18558542 46857
82 13 BTA-25900-no-rs 18509812 Exterior 18558538 18558542 46858
83 13 ARS-BFGL-BAC-7444 18558540 Exterior 18558538 18558542 46858
84 13 BTA-25900-no-rs 18509812 Milk 18558538 18558542 46859
85 13 ARS-BFGL-BAC-7444 18558540 Milk 18558538 18558542 46859
86 13 BTA-25900-no-rs 18509812 Production 18558538 18558542 46860
87 13 ARS-BFGL-BAC-7444 18558540 Production 18558538 18558542 46860
88 13 BTA-25900-no-rs 18509812 Production 18558538 18558542 46861
89 13 ARS-BFGL-BAC-7444 18558540 Production 18558538 18558542 46861
90 13 BTA-25900-no-rs 18509812 Milk 18558538 18558542 46862
91 13 ARS-BFGL-BAC-7444 18558540 Milk 18558538 18558542 46862
92 13 BTA-25900-no-rs 18509812 Exterior 18558538 18558542 46863
93 13 ARS-BFGL-BAC-7444 18558540 Exterior 18558538 18558542 46863
94 13 BTA-25900-no-rs 18509812 Exterior 18558538 18558542 46864
95 13 ARS-BFGL-BAC-7444 18558540 Exterior 18558538 18558542 46864
96 13 BTA-25900-no-rs 18509812 Reproduction 18558538 18558542 46865
97 13 ARS-BFGL-BAC-7444 18558540 Reproduction 18558538 18558542 46865
98 13 BTA-25900-no-rs 18509812 Reproduction 18558538 18558542 46866
99 13 ARS-BFGL-BAC-7444 18558540 Reproduction 18558538 18558542 46866
100 13 BTA-25900-no-rs 18509812 Exterior 18558538 18558542 46867
101 13 ARS-BFGL-BAC-7444 18558540 Exterior 18558538 18558542 46867
102 13 BTA-25900-no-rs 18509812 Exterior 18558538 18558542 46868
103 13 ARS-BFGL-BAC-7444 18558540 Exterior 18558538 18558542 46868
104 15 BTB-02063964 10906064 Reproduction 10936000 10936004 47797
105 15 BTB-01830390 10936002 Reproduction 10936000 10936004 47797
106 15 BTB-01608944 10974997 Reproduction 10936000 10936004 47797
107 15 BTB-02063964 10906064 Reproduction 10936000 10936004 47798
108 15 BTB-01830390 10936002 Reproduction 10936000 10936004 47798
109 15 BTB-01608944 10974997 Reproduction 10936000 10936004 47798
110 15 BTB-02063964 10906064 Exterior 10936000 10936004 47799
111 15 BTB-01830390 10936002 Exterior 10936000 10936004 47799
112 15 BTB-01608944 10974997 Exterior 10936000 10936004 47799
113 15 BTB-02063964 10906064 Milk 10936000 10936004 47800
114 15 BTB-01830390 10936002 Milk 10936000 10936004 47800
115 15 BTB-01608944 10974997 Milk 10936000 10936004 47800
116 15 BTB-02063964 10906064 Milk 10936000 10936004 47801
117 15 BTB-01830390 10936002 Milk 10936000 10936004 47801
118 15 BTB-01608944 10974997 Milk 10936000 10936004 47801
119 15 BTB-02063964 10906064 Production 10936000 10936004 47802
120 15 BTB-01830390 10936002 Production 10936000 10936004 47802
121 15 BTB-01608944 10974997 Production 10936000 10936004 47802
122 15 BTB-02063964 10906064 Production 10936000 10936004 47803
123 15 BTB-01830390 10936002 Production 10936000 10936004 47803
124 15 BTB-01608944 10974997 Production 10936000 10936004 47803
125 15 BTB-02063964 10906064 Milk 10936000 10936004 47804
126 15 BTB-01830390 10936002 Milk 10936000 10936004 47804
127 15 BTB-01608944 10974997 Milk 10936000 10936004 47804
128 15 BTB-02063964 10906064 Milk 10936000 10936004 47805
129 15 BTB-01830390 10936002 Milk 10936000 10936004 47805
130 15 BTB-01608944 10974997 Milk 10936000 10936004 47805
131 15 BTB-02063964 10906064 Reproduction 10936000 10936004 47806
132 15 BTB-01830390 10936002 Reproduction 10936000 10936004 47806
133 15 BTB-01608944 10974997 Reproduction 10936000 10936004 47806
134 15 BTB-02063964 10906064 Health 10936000 10936004 47807
135 15 BTB-01830390 10936002 Health 10936000 10936004 47807
136 15 BTB-01608944 10974997 Health 10936000 10936004 47807
137 15 BTB-02063964 10906064 Reproduction 10936000 10936004 47808
138 15 BTB-01830390 10936002 Reproduction 10936000 10936004 47808
139 15 BTB-01608944 10974997 Reproduction 10936000 10936004 47808
140 15 BTB-02063964 10906064 Exterior 10936000 10936004 47809
141 15 BTB-01830390 10936002 Exterior 10936000 10936004 47809
142 15 BTB-01608944 10974997 Exterior 10936000 10936004 47809
143 15 BTB-02063964 10906064 Exterior 10936000 10936004 47810
144 15 BTB-01830390 10936002 Exterior 10936000 10936004 47810
145 15 BTB-01608944 10974997 Exterior 10936000 10936004 47810
146 15 BTB-02063964 10906064 Exterior 10936000 10936004 47811
147 15 BTB-01830390 10936002 Exterior 10936000 10936004 47811
148 15 BTB-01608944 10974997 Exterior 10936000 10936004 47811
149 15 BTB-02063964 10906064 Reproduction 10936000 10936004 281490
150 15 BTB-01830390 10936002 Reproduction 10936000 10936004 281490
151 15 BTB-01608944 10974997 Reproduction 10936000 10936004 281490
152 15 BTB-01421892 11185546 Meat_and_Carcass 11198692 11198696 226564
153 15 BTB-01421934 11207429 Meat_and_Carcass 11198692 11198696 226564
154 15 BTB-01422008 11236303 Meat_and_Carcass 11198692 11198696 226564
155 15 ARS-BFGL-NGS-2713 34054485 Meat_and_Carcass 34025602 34025606 152600
156 15 ARS-BFGL-NGS-98724 34109962 Reproduction 34139603 34139607 62361
157 15 ARS-BFGL-NGS-5976 34144843 Reproduction 34139603 34139607 62361
158 15 ARS-BFGL-NGS-98724 34109962 Reproduction 34139603 34139607 62407
159 15 ARS-BFGL-NGS-5976 34144843 Reproduction 34139603 34139607 62407
160 15 ARS-BFGL-NGS-98724 34109962 Reproduction 34139603 34139607 62411
161 15 ARS-BFGL-NGS-5976 34144843 Reproduction 34139603 34139607 62411
162 15 ARS-BFGL-NGS-5976 34144843 Milk 34161889 34161893 155975
163 15 ARS-BFGL-NGS-5976 34144843 Reproduction 34164294 34164298 62410
164 2 ARS-BFGL-BAC-35548 115665427 Production 115695001 115695005 283322
165 2 BTA-49096-no-rs 115695003 Production 115695001 115695005 283322
166 2 ARS-BFGL-NGS-30337 115730530 Production 115695001 115695005 283322
167 2 ARS-BFGL-NGS-103753 115821065 Milk 115820324 115820328 215425
168 2 ARS-BFGL-NGS-103753 115821065 Exterior 115839213 115839217 125900
169 2 Hapmap54770-rs29009608 115875702 Exterior 115839213 115839217 125900
170 2 Hapmap54770-rs29009608 115875702 Milk 115909335 115909339 155904
171 2 Hapmap54770-rs29009608 115875702 Milk 115909359 115909363 155914
172 2 ARS-BFGL-NGS-43721 115986085 Production 115986083 115986087 39013
173 2 ARS-BFGL-NGS-107330 116018639 Production 115986083 115986087 39013
174 2 ARS-BFGL-NGS-43721 115986085 Reproduction 115986083 115986087 39014
175 2 ARS-BFGL-NGS-107330 116018639 Reproduction 115986083 115986087 39014
176 2 ARS-BFGL-NGS-43721 115986085 Exterior 115986083 115986087 39015
177 2 ARS-BFGL-NGS-107330 116018639 Exterior 115986083 115986087 39015
178 2 ARS-BFGL-NGS-43721 115986085 Exterior 115986083 115986087 39016
179 2 ARS-BFGL-NGS-107330 116018639 Exterior 115986083 115986087 39016
180 2 ARS-BFGL-NGS-43721 115986085 Milk 115986083 115986087 39017
181 2 ARS-BFGL-NGS-107330 116018639 Milk 115986083 115986087 39017
182 2 ARS-BFGL-NGS-43721 115986085 Production 115986083 115986087 39018
183 2 ARS-BFGL-NGS-107330 116018639 Production 115986083 115986087 39018
184 2 ARS-BFGL-NGS-43721 115986085 Exterior 115986083 115986087 39019
185 2 ARS-BFGL-NGS-107330 116018639 Exterior 115986083 115986087 39019
186 2 ARS-BFGL-NGS-43721 115986085 Milk 115986083 115986087 39020
187 2 ARS-BFGL-NGS-107330 116018639 Milk 115986083 115986087 39020
188 2 ARS-BFGL-NGS-43721 115986085 Production 115986083 115986087 39021
189 2 ARS-BFGL-NGS-107330 116018639 Production 115986083 115986087 39021
190 2 ARS-BFGL-NGS-43721 115986085 Production 115986083 115986087 39022
191 2 ARS-BFGL-NGS-107330 116018639 Production 115986083 115986087 39022
192 2 ARS-BFGL-NGS-43721 115986085 Milk 115986083 115986087 39023
193 2 ARS-BFGL-NGS-107330 116018639 Milk 115986083 115986087 39023
194 2 ARS-BFGL-NGS-43721 115986085 Milk 115986083 115986087 39024
195 2 ARS-BFGL-NGS-107330 116018639 Milk 115986083 115986087 39024
196 2 ARS-BFGL-NGS-43721 115986085 Exterior 115986083 115986087 39025
197 2 ARS-BFGL-NGS-107330 116018639 Exterior 115986083 115986087 39025
198 2 ARS-BFGL-NGS-43721 115986085 Exterior 115986083 115986087 39026
199 2 ARS-BFGL-NGS-107330 116018639 Exterior 115986083 115986087 39026
200 2 ARS-BFGL-NGS-43721 115986085 Reproduction 115986083 115986087 39027
201 2 ARS-BFGL-NGS-107330 116018639 Reproduction 115986083 115986087 39027
202 2 ARS-BFGL-NGS-43721 115986085 Exterior 115986083 115986087 39028
203 2 ARS-BFGL-NGS-107330 116018639 Exterior 115986083 115986087 39028
204 20 ARS-BFGL-NGS-3276 12087403 Health 12088254 12088258 179019
205 20 ARS-BFGL-NGS-78615 12111883 Health 12088254 12088258 179019
206 24 ARS-BFGL-BAC-28665 28487771 Meat_and_Carcass 28473214 28473218 232857
207 24 Hapmap54981-rs29019846 28516684 Meat_and_Carcass 28473214 28473218 232857
208 24 BTB-01485274 28540641 Health 28570243 28570247 57038
209 24 Hapmap58887-rs29013502 28570245 Health 28570243 28570247 57038
210 24 BTB-01646599 28604672 Health 28570243 28570247 57038
211 24 BTB-01485274 28540641 Health 28570243 28570247 57040
212 24 Hapmap58887-rs29013502 28570245 Health 28570243 28570247 57040
213 24 BTB-01646599 28604672 Health 28570243 28570247 57040
214 24 BTB-01485274 28540641 Production 28570243 28570247 69281
215 24 Hapmap58887-rs29013502 28570245 Production 28570243 28570247 69281
216 24 BTB-01646599 28604672 Production 28570243 28570247 69281
217 24 Hapmap58887-rs29013502 28570245 Reproduction 28604670 28604674 138598
218 24 BTB-01646599 28604672 Reproduction 28604670 28604674 138598
219 3 BTB-01948148 111677167 Reproduction 111708234 111708238 30008
220 3 Hapmap42062-BTA-109789 111708236 Reproduction 111708234 111708238 30008
221 3 BTB-01641394 111730561 Reproduction 111708234 111708238 30008
222 3 ARS-BFGL-NGS-37809 111751663 Reproduction 111708234 111708238 30008
223 3 BTB-01948148 111677167 Reproduction 111708234 111708238 30244
224 3 Hapmap42062-BTA-109789 111708236 Reproduction 111708234 111708238 30244
225 3 BTB-01641394 111730561 Reproduction 111708234 111708238 30244
226 3 ARS-BFGL-NGS-37809 111751663 Reproduction 111708234 111708238 30244
227 3 BTB-01948148 111677167 Meat_and_Carcass 111708234 111708238 152258
228 3 Hapmap42062-BTA-109789 111708236 Meat_and_Carcass 111708234 111708238 152258
229 3 BTB-01641394 111730561 Meat_and_Carcass 111708234 111708238 152258
230 3 ARS-BFGL-NGS-37809 111751663 Meat_and_Carcass 111708234 111708238 152258
231 3 BTB-01948148 111677167 Meat_and_Carcass 111717521 111717525 225527
232 3 Hapmap42062-BTA-109789 111708236 Meat_and_Carcass 111717521 111717525 225527
233 3 BTB-01641394 111730561 Meat_and_Carcass 111717521 111717525 225527
234 3 ARS-BFGL-NGS-37809 111751663 Meat_and_Carcass 111717521 111717525 225527
235 3 ARS-BFGL-NGS-97849 111965305 Meat_and_Carcass 111991214 111991218 151880
236 3 ARS-BFGL-NGS-97849 111965305 Meat_and_Carcass 111991214 111991218 152096
237 5 BTA-73915-no-rs 6312610 Reproduction 6318394 6318398 212544

QTL type

#plotting the percentage of each QTL class annoatted
oldpar <- par(mar=c(1,5,0.5,1))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1.5)

My Image

Production QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Production")

My Image

Reproduction QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")

My Image

Milk QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Milk")

My Image

Meat and Carcass QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Meat_and_Carcass")

My Image

Health QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")

My Image

Exterior QTLs

#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Exterior")

My Image

9.4.2.1 Windows - QTL enrichment on GALLO

#QTL enrichment analysis 
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "Name",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)


# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]

write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_name_w.csv")

out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2, 
                                 qtl_file= out.qtl, qtl_type = "QTL_type",
                                 enrich_type = "genome", chr.subset = NULL, 
                                 padj = "fdr",nThreads = 2)

sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/out_enrich_qtl_genome_type_w.csv")


#Plots

#Name

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")


#Type

#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")

#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]

#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")

QTL Enrichment outcomes

Enrichment by name (enrichment analysis will be performed for each trait individually)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval QTL_type
11 Foot angle 6 672 128 163224 0.0000169 0.0005655 Exterior
32 Rear leg placement - side view 5 430 128 163224 0.0000251 0.0005655 Exterior
6 Calving ease 12 3819 128 163224 0.0000513 0.0005773 Reproduction
31 Rear leg placement - rear view 5 491 128 163224 0.0000471 0.0005773 Exterior
28 Net merit 6 903 128 163224 0.0000863 0.0007764 Production
37 Stillbirth 7 1363 128 163224 0.0001097 0.0008227 Reproduction
9 Feet and leg conformation 5 627 128 163224 0.0001476 0.0009488 Exterior
44 Udder depth 5 695 128 163224 0.0002371 0.0013337 Exterior
30 PTA type 4 627 128 163224 0.0015793 0.0078963 Production
43 Udder attachment 4 655 128 163224 0.0018503 0.0083263 Exterior
25 Milk tridecylic acid content 3 319 128 163224 0.0021074 0.0084756 Milk
40 Teat placement - front 3 327 128 163224 0.0022602 0.0084756 Exterior
10 Fertility index 2 108 128 163224 0.0033389 0.0115577 Reproduction
12 Head width 1 6 128 163224 0.0046960 0.0150944 Production
17 Length of productive life 6 2004 128 163224 0.0051861 0.0155583 Production
33 Respiratory rate 1 9 128 163224 0.0070359 0.0197884 Health
34 Rump conformation 1 13 128 163224 0.0101471 0.0268600 Exterior
38 Strength 3 664 128 163224 0.0157176 0.0392939 Exterior
3 Body temperature 1 23 128 163224 0.0178830 0.0423545 Health
26 Muscle calcium content 1 40 128 163224 0.0308966 0.0695174 Meat and Carcass
27 Muscle sodium content 1 56 128 163224 0.0429884 0.0921180 Meat and Carcass
14 Interval from first to last insemination 2 445 128 163224 0.0481346 0.0984572 Reproduction
15 Interval to first estrus after calving 3 1053 128 163224 0.0505607 0.0989232 Reproduction
45 Udder height 2 504 128 163224 0.0599662 0.1124366 Exterior
16 Intramuscular fat 1 117 128 163224 0.0877305 0.1579149 Meat and Carcass
8 Fat thickness at the 12th rib 1 169 128 163224 0.1242283 0.2150105 Meat and Carcass
22 Milk glycosylated kappa-casein percentage 4 2527 128 163224 0.1380822 0.2301370 Milk
1 Age at first calving 1 233 128 163224 0.1671652 0.2686584 Reproduction
39 Teat length 1 300 128 163224 0.2098779 0.3148168 Exterior
41 Teat placement - rear 1 298 128 163224 0.2086349 0.3148168 Exterior
36 Somatic cell score 2 1122 128 163224 0.2199895 0.3193396 Health
18 M. paratuberculosis susceptibility 1 492 128 163224 0.3206093 0.4508568 Health
2 Body depth 1 616 128 163224 0.3837908 0.5233511 Production
19 Marbling score 2 1817 128 163224 0.4175991 0.5369131 Meat and Carcass
35 Shear force 3 2954 128 163224 0.4091307 0.5369131 Meat and Carcass
24 Milk protein yield 3 3093 128 163224 0.4380445 0.5475556 Milk
13 Inseminations per conception 1 790 128 163224 0.4627346 0.5627854 Reproduction
29 Pregnancy rate 1 944 128 163224 0.5241831 0.6207431 Reproduction
5 Bovine tuberculosis susceptibility 1 1155 128 163224 0.5972036 0.6890811 Health
4 Body weight 1 4289 128 163224 0.9669506 0.9713008 Production
7 Connective tissue amount 1 3142 128 163224 0.9170032 0.9713008 Meat and Carcass
20 Milk fat percentage 5 10941 128 163224 0.9357122 0.9713008 Milk
21 Milk fat yield 4 8220 128 163224 0.8906967 0.9713008 Milk
23 Milk protein percentage 3 8803 128 163224 0.9713008 0.9713008 Milk
42 Tenderness score 1 3483 128 163224 0.9368355 0.9713008 Meat and Carcass

My Image

Enrichment by QTL_type (enrichment processes performed for the QTL classes)

X QTL N_QTLs N_QTLs_db Total_annotated_QTLs Total_QTLs_db pvalue adj.pval ID
1 Exterior 41 9077 128 163224 0.0000000 0.0000000 Exterior - CHR
2 Health 6 5889 128 163224 0.3161000 0.6049498 Health - CHR
3 Meat and Carcass 11 18258 128 163224 0.8597556 1.0000000 Meat and Carcass - CHR
4 Milk 22 75352 128 163224 1.0000000 1.0000000 Milk - CHR
5 Production 19 19640 128 163224 0.1968827 0.5906482 Production - CHR
6 Reproduction 29 35008 128 163224 0.4032999 0.6049498 Reproduction - CHR

My Image

9.5 Match Significant p-value and Windows variance bigger than 0.5%

#Only significant SNPs after Genome Independent Segments adjust
sigPvalue <- read.csv("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval_SNPid_ind_seg_sig_BLUPF90.csv")
sigPvalue <- sigPvalue[, c("CHR", "BP", "LOG_P", "SNP_ID")]

w_var <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/chrsnpvar", header = F)
w_var <- filter(w_var, V3 > 0.5)
colnames(w_var) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")

snp_map <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/snpmap.txt_clean", header = T)

# Fazer o merge baseado em duas condições: CHR e POS
merged_data <- merge(w_var, snp_map, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
w_var <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]

# Get indices in w_var where SNP_ID matches those in sigPvalue
indices <- which(w_var$SNP_ID %in% sigPvalue$SNP_ID)

# Optionally extract the rows from w_var based on these indices
matching_rows <- w_var[indices, ]

write.csv(matching_rows, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/pvalue_windowsVar_match.csv")
write.csv(w_var, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/windowsVar_min05.csv")

sum(matching_rows$Var)

The 3 SNPs bellow were significant associated to cortisol considering the p-value adjusted for genome independent segments and also individually explain more than 0.5% of the variance.

X CHR BP Var SNP_ID
36 3 111730561 0.9480214 BTB-01641394
37 3 111751663 1.0607739 ARS-BFGL-NGS-37809
38 3 111772736 1.2101267 ARS-BFGL-NGS-25298

These 3 SNPs explain together 3,22% of Variance and all of them fall in the gene CSMD2

I checked if the genes found surrounding SNPs in “Regular Blup” and “Windows Blup” were the same.

regGwas <- read.csv("/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_genes_50k.csv")

common_genes <- out.genes[out.genes$gene_id %in% regGwas$gene_id, ]

write.csv(common_genes, "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/windows10/common_genes.csv")

And the unique common gene surrounding the SNPs in both analysis were also CSMD2

9.6 Allele frequency for GWAS output

I created the code bellow to find out the allele frequency for those most significant SNPs considering the two different methodologies, the “regular”ssGWAS and the window ssGWAS.

# Bringing SNP_ID to SNP_Frequency
snpmap <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/snpmap.txt_clean", header=T)
snpfreq <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/freqdata.count")
map_freq <- merge(snpmap, snpfreq, by.x = "CHR", by.y = "V1")
colnames(map_freq) <- c("CHR", "POS", "SNP_ID", "FREQ")
write.csv(map_freq, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/allele_freq.csv")


# Output for Variance explained by Window
snpWvar <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/chrsnpvar", header = F)
snpWvar <- filter(snpWvar, V3 > 0.5)
colnames(snpWvar) <- c("V1", "V2", "Var", "SNP", "CHR", "BP")
merged_data <- merge(snpWvar, snpmap, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
snpWvar <- merged_data[,c("CHR", "BP", "Var", "SNP_ID")]
var_freq <- merge(snpWvar, map_freq, by.x = "SNP_ID", by.y="SNP_ID") 
write.csv(var_freq, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/window_10/w_allele_freq.csv")

# Output for significant p-value

#Estimating Bonferroni for genome independent segments
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")

library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
  filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
  summarise(total_length = sum(`Seq length`)) %>%
  pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)

#import MARKER files = the GWAS output
snpPval = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval")
colnames(snpPval) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
snpPval <- filter(snpPval, LOG_P >= bonf)
snpPval <- snpPval[,c("LOG_P", "SNP", "CHR", "BP")]
merged_data2 <- merge(snpPval, snpmap, by.x = c("CHR", "BP"), by.y = c("CHR", "POS"), all.x = TRUE)
snpPval <- merged_data2[,c("CHR", "BP", "LOG_P", "SNP_ID")]
pval_freq <- merge(snpPval, map_freq, by.x = "SNP_ID", by.y="SNP_ID") 
write.csv(pval_freq, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/pval_allele_freq.csv")

9.6.1 “Regular” ssGWAS

X SNP_ID CHR.x BP LOG_P CHR.y POS FREQ
1 ARS-BFGL-NGS-110705 4 103979600 5.591006 4 103979600 0.345070
2 ARS-BFGL-NGS-112149 17 48800954 5.268940 17 48800954 0.823944
3 ARS-BFGL-NGS-112210 21 4055731 4.493938 21 4055731 0.281690
4 ARS-BFGL-NGS-115605 23 44822126 4.316434 23 44822126 0.521127
5 ARS-BFGL-NGS-116552 28 20016672 5.130557 28 20016672 0.239437
6 ARS-BFGL-NGS-118207 3 110866523 4.815943 3 110866523 0.295775
7 ARS-BFGL-NGS-12139 4 50755462 5.015905 4 50755462 0.345070
8 ARS-BFGL-NGS-12510 17 46487068 4.547766 17 46487068 0.823944
9 ARS-BFGL-NGS-25298 3 111772736 5.088738 3 111772736 0.295775
10 ARS-BFGL-NGS-30515 15 57930281 4.514377 15 57930281 0.394366
11 ARS-BFGL-NGS-37809 3 111751663 4.390707 3 111751663 0.295775
12 ARS-BFGL-NGS-41631 29 33039863 4.933140 29 33039863 0.915493
13 ARS-BFGL-NGS-45265 4 107787202 5.375628 4 107787202 0.345070
14 ARS-BFGL-NGS-71077 28 35807366 4.695875 28 35807366 0.239437
15 ARS-BFGL-NGS-74948 3 111526170 4.760351 3 111526170 0.295775
16 ARS-BFGL-NGS-82859 14 15929822 5.467439 14 15929822 0.330986
17 ARS-BFGL-NGS-87412 17 47884497 6.622354 17 47884497 0.823944
18 ARS-BFGL-NGS-91119 20 65351653 4.891206 20 65351653 0.922535
19 BTB-00096979 2 41602429 4.497977 2 41602429 0.091549
20 BTB-00170785 4 27094376 5.485063 4 27094376 0.345070
21 BTB-00488482 12 17477322 4.417576 12 17477322 0.035211
22 BTB-00594449 15 33066384 5.041271 15 33066384 0.394366
23 BTB-01148543 7 110306791 4.609691 7 110306791 0.985915
24 BTB-01341053 20 60365668 4.603817 20 60365668 0.922535
25 BTB-01641394 3 111730561 4.390707 3 111730561 0.295775
26 Hapmap23854-BTC-062412 6 36184467 4.758110 6 36184467 0.218310
27 Hapmap47669-BTA-59022 24 857728 4.458018 24 857728 0.035211
28 Hapmap48062-BTA-72409 4 109568557 4.709882 4 109568557 0.345070
29 Hapmap48777-BTA-47434 2 41670655 4.377284 2 41670655 0.091549
30 Hapmap50554-BTA-107048 4 25933587 4.352451 4 25933587 0.345070
31 Hapmap53065-rs29026778 2 118820218 4.420962 2 118820218 0.091549
32 Hapmap55558-rs29013980 11 19779915 4.828190 11 19779915 0.070423
33 Hapmap58854-rs29023486 4 95650788 4.825103 4 95650788 0.345070
34 Hapmap59011-rs29027498 4 26511448 4.537662 4 26511448 0.345070
35 Hapmap59743-rs29017061 4 27021431 5.045447 4 27021431 0.345070
36 Hapmap60503-rs29018741 4 7873471 4.610767 4 7873471 0.345070
37 Hapmap60681-rs29013301 4 24239851 4.641241 4 24239851 0.345070
38 UA-IFASA-2147 4 110053134 5.245598 4 110053134 0.345070

9.6.2 Windows ssGWAS

X SNP_ID CHR.x BP Var CHR.y POS FREQ
1 ARS-BFGL-BAC-28665 24 28487771 0.6899732 24 28487771 0.035211
2 ARS-BFGL-BAC-35548 2 115665427 1.2851806 2 115665427 0.091549
3 ARS-BFGL-BAC-7444 13 18558540 0.6630789 13 18558540 0.605634
4 ARS-BFGL-NGS-103753 2 115821065 1.1689686 2 115821065 0.091549
5 ARS-BFGL-NGS-107330 2 116018639 0.5958551 2 116018639 0.091549
6 ARS-BFGL-NGS-25298 3 111772736 1.2101267 3 111772736 0.295775
7 ARS-BFGL-NGS-2713 15 34054485 0.5549892 15 34054485 0.394366
8 ARS-BFGL-NGS-30337 2 115730530 1.3488469 2 115730530 0.091549
9 ARS-BFGL-NGS-3276 20 12087403 0.5722138 20 12087403 0.922535
10 ARS-BFGL-NGS-37809 3 111751663 1.0607739 3 111751663 0.295775
11 ARS-BFGL-NGS-43721 2 115986085 0.8178265 2 115986085 0.091549
12 ARS-BFGL-NGS-44131 3 111806406 1.0252690 3 111806406 0.295775
13 ARS-BFGL-NGS-5141 24 28417928 0.5945940 24 28417928 0.035211
14 ARS-BFGL-NGS-5976 15 34144843 0.5490537 15 34144843 0.394366
15 ARS-BFGL-NGS-6202 3 111833768 1.1671762 3 111833768 0.295775
16 ARS-BFGL-NGS-78615 20 12111883 0.5689079 20 12111883 0.922535
17 ARS-BFGL-NGS-85333 3 111933069 0.6077153 3 111933069 0.295775
18 ARS-BFGL-NGS-97849 3 111965305 0.7547885 3 111965305 0.295775
19 ARS-BFGL-NGS-98724 15 34109962 0.6252263 15 34109962 0.394366
20 BTA-25900-no-rs 13 18509812 0.6364128 13 18509812 0.605634
21 BTA-49096-no-rs 2 115695003 1.6071883 2 115695003 0.091549
22 BTA-73915-no-rs 5 6312610 0.5103844 5 6312610 0.218310
23 BTA-91816-no-rs 15 11100934 0.9739683 15 11100934 0.394366
24 BTB-01421844 15 11144666 0.7131750 15 11144666 0.394366
25 BTB-01421892 15 11185546 0.6463689 15 11185546 0.394366
26 BTB-01421934 15 11207429 0.8898567 15 11207429 0.394366
27 BTB-01422008 15 11236303 0.6832306 15 11236303 0.394366
28 BTB-01434227 10 50554831 0.6660810 10 50554831 0.929577
29 BTB-01485274 24 28540641 0.8983644 24 28540641 0.035211
30 BTB-01608944 15 10974997 1.1206945 15 10974997 0.394366
31 BTB-01641394 3 111730561 0.9480214 3 111730561 0.295775
32 BTB-01646599 24 28604672 0.5894579 24 28604672 0.035211
33 BTB-01813405 15 10879514 0.5556342 15 10879514 0.394366
34 BTB-01830390 15 10936002 1.0480277 15 10936002 0.394366
35 BTB-01948148 3 111677167 0.7467347 3 111677167 0.295775
36 BTB-02063964 15 10906064 0.8788211 15 10906064 0.394366
37 Hapmap41888-BTA-49091 2 115622067 0.8268392 2 115622067 0.091549
38 Hapmap42062-BTA-109789 3 111708236 1.2415855 3 111708236 0.295775
39 Hapmap49833-BTA-103929 13 18386942 0.5282867 13 18386942 0.605634
40 Hapmap50266-BTA-13664 13 18266073 0.6006650 13 18266073 0.605634
41 Hapmap54770-rs29009608 2 115875702 0.7398417 2 115875702 0.091549
42 Hapmap54981-rs29019846 24 28516684 0.7747524 24 28516684 0.035211
43 Hapmap58887-rs29013502 24 28570245 0.7619213 24 28570245 0.035211

10 Genetic Correlation

To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.

Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.

Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.

The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]

Where:

This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.

10.1 Data preparation

The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files

rm(list = ls())

# Load the necessary library
library(dplyr)
library(tidyverse)

cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")

#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")


sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]

sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]

print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)

# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)

# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)

# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))

if (have_same_columns) {
  print("The dataframes have the same columns.")
} else {
  print("The dataframes do not have the same columns.")
}


# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))

if (same_order) {
  print("The columns are in the same order.")
} else {
  print("The columns are not in the same order.")
}

GEBVs_combined <- rbind(GEBVs1, GEBVs2)

# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)

# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)

# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME

# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
  select(elora_id, DHI_BARN_NAME, everything())

write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")

# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")

write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")

#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")

colnames(Merg_Cort_GEBVs)[405] <- "IDD"

data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))

# The data below has the the 55 GEBVs + Cortisol data + Birth Year
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.csv")

samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")

# Convert Sampling_date to Date using as.Date
samp_date$Sampling_date <- as.Date(samp_date$Sampling_date, format = "%m/%d/%Y")

table(samp_date$Sampling_date)

samp_date <- select(samp_date, Elora_id, Sampling_date)

# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)

data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")

data_final <- data_final %>%
  select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date, everything())

# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")

ps. I double checked by hand the select and merge process against the original tables received and is everything ok.

10.2 Correlations - Linear Regression

10.2.1 Test with one trait

# Fit the linear regression model
model <- lm(T4Cortisol ~ MILK, data = data)

# Summarize the model to get the regression coefficients and statistical summary
model_summary <- summary(model)

# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1]
p_value <- pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)

# Combine the statistics into a data frame
results <- data.frame(
  Multiple_R_Squared = multiple_r_squared,
  Adjusted_R_Squared = adjusted_r_squared,
  F_Statistic = f_statistic,
  P_Value = p_value
)

# Save the results to a CSV file
write.csv(results, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_milk.csv", row.names = FALSE)

# Plot the linear regression between T4Cortisol and MILK
ggplot(data, aes(x = MILK, y = T4Cortisol)) +
  geom_point() +  # Add points
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  # Add linear regression line
  labs(title = "Linear Regression of T4Cortisol on MILK",
       x = "MILK",
       y = "T4Cortisol") +
  theme_minimal()

Plot linear regression Cortisol vs. Milk GEBV My Image

Summary statistics

Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value
5.2e-05 -0.0038237 0.0134257 0.9078463

10.2.2 Loop for all traits

As we have 54 GEBVs to fit a linear regression we designed a loop:

# Initialize a list to store the results
results_list <- list()

# Loop through columns 4 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
  trait_name <- colnames(data)[i]
  
  # Fit the linear regression model
  model <- lm(data[[2]] ~ data[[i]], data = data)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 3]] <- result
}

# Combine all results into a single data frame
results_df_0 <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
BMR 0.0400473 0.0363266 10.7632421 0.0011781 21.1128882 0.0011781
CO 0.0280949 0.0243278 7.4580136 0.0067510 -13.2769834 0.0067510
MSL 0.0198445 0.0160455 5.2235464 0.0230946 -16.9454262 0.0230946
CTFS 0.0185227 0.0147186 4.8690531 0.0282237 15.8469990 0.0282237
UT 0.0166425 0.0128310 4.3664218 0.0376334 -15.6741466 0.0376334
MS 0.0128559 0.0090298 3.3600230 0.0679494 -14.7558389 0.0679494
CK 0.0106156 0.0067808 2.7682148 0.0973674 14.6866464 0.0973674
IH 0.0101222 0.0062855 2.6382359 0.1055404 -7.3062481 0.1055404
DD 0.0100161 0.0061790 2.6103042 0.1073935 -9.5352332 0.1073935
CONF 0.0095452 0.0057062 2.4863924 0.1160600 -13.9681677 0.1160600
UD 0.0085295 0.0046866 2.2195470 0.1374945 -15.7518165 0.1374945
SU 0.0077590 0.0039131 2.0174678 0.1567056 7.3716271 0.1567056
MT 0.0076274 0.0037810 1.9829901 0.1602796 -10.6502458 0.1602796
LP 0.0074242 0.0035770 1.9297608 0.1659824 9.2112199 0.1659824
MET 0.0070642 0.0032156 1.8355367 0.1766601 -7.6934858 0.1766601
MR 0.0066755 0.0028254 1.7338637 0.1890867 -7.8752018 0.1890867
DO 0.0060791 0.0022267 1.5779921 0.2101864 9.5682702 0.2101864
DHL 0.0052895 0.0014340 1.3719419 0.2425591 -9.4618477 0.2425591
MOB 0.0047368 0.0008792 1.2279220 0.2688435 7.8139790 0.2688435
DF 0.0046402 0.0007822 1.2027541 0.2737945 7.8603320 0.2737945
FAT 0.0046204 0.0007623 1.1975980 0.2748229 1.0090961 0.2748229
SH 0.0043235 0.0004643 1.1203076 0.2908422 5.2472606 0.2908422
DS 0.0043121 0.0004528 1.1173348 0.2914818 -7.5550660 0.2914818
FOOT 0.0043005 0.0004412 1.1143288 0.2921304 26.7441735 0.2921304
FA 0.0042762 0.0004168 1.1079850 0.2935052 -8.0817790 0.2935052
PROT 0.0040026 0.0001421 1.0368190 0.3095163 1.2915168 0.3095163
WL 0.0038262 -0.0000350 0.9909414 0.3204451 5.2197508 0.3204451
MDR 0.0038096 -0.0000516 0.9866366 0.3214965 7.0545996 0.3214965
CA 0.0036379 -0.0002239 0.9420177 0.3326685 7.4603427 0.3326685
ME 0.0034176 -0.0004452 0.8847561 0.3477821 -5.0950658 0.3477821
SCS 0.0031318 -0.0007321 0.8105371 0.3688011 -4.8756313 0.3688011
FE 0.0028874 -0.0009774 0.7470985 0.3881993 -5.1774356 0.3881993
FTP 0.0027874 -0.0010778 0.7211475 0.3965550 9.8351267 0.3965550
AFS 0.0025212 -0.0013450 0.6521188 0.4201001 -5.9185799 0.4201001
UF 0.0018686 -0.0020001 0.4830008 0.4876916 6.8229142 0.4876916
ST 0.0018050 -0.0020640 0.4665276 0.4952019 -7.1228081 0.4952019
MSP 0.0014890 -0.0023812 0.3847251 0.5356327 -3.5290172 0.5356327
HL 0.0013205 -0.0025503 0.3411423 0.5596809 -4.3615962 0.5596809
SCK 0.0011411 -0.0027305 0.2947310 0.5876733 3.3934702 0.5876733
TL 0.0009008 -0.0029717 0.2326179 0.6299983 -5.0172076 0.6299983
LOC 0.0008542 -0.0030185 0.2205721 0.6390011 -3.5628830 0.6390011
DA 0.0006826 -0.0031907 0.1762328 0.6749803 3.5280623 0.6749803
CW 0.0005859 -0.0032878 0.1512479 0.6976664 -2.7721164 0.6976664
BCS 0.0003594 -0.0035151 0.0927672 0.7609338 2.1412729 0.7609338
RUM 0.0003149 -0.0035598 0.0812707 0.7758114 -1.6952340 0.7758114
TU 0.0002204 -0.0036547 0.0568793 0.8116875 1.0400512 0.8116875
FL 0.0001793 -0.0036960 0.0462618 0.8298705 1.5296709 0.8298705
BQ 0.0001697 -0.0037056 0.0437957 0.8343993 -1.5434870 0.8343993
HHE 0.0000521 -0.0038236 0.0134478 0.9077707 0.6475671 0.9077707
MILK 0.0000520 -0.0038237 0.0134257 0.9078463 0.0042447 0.9078463
HH 0.0000509 -0.0038249 0.0131340 0.9088482 -0.6606061 0.9088482
BD 0.0000293 -0.0038466 0.0075535 0.9308097 -0.6433874 0.9308097
DCA 0.0000046 -0.0038713 0.0011932 0.9724707 -0.2685932 0.9724707
FEED 0.0000001 -0.0038759 0.0000152 0.9968903 -0.0190613 0.9968903
Five out 54 traits GEBVs presented significant correlations with Cortisol:
  • BMR = Body Maintenance Requirements
  • CO = Cystic ovaries
  • MSL = Median Suspensory Ligament
  • CTFS = Calving to First Service
  • UT = Udder Texture

10.2.3 Adding BIRTH_YEAR

10.2.3.1 Model Description

The regression model added the BY is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The BIRTH_YEAR variable is converted to a factor to account for the categorical nature of birth years.

# Convert BIRTH_YEAR to a factor and rename
data$BIRTH_YEAR <- as.factor(data$BIRTH_YEAR)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
  trait_name <- colnames(data)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data[[2]] ~ data[[i]] + BIRTH_YEAR, data = data)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
BMR 0.0881915 0.0738886 6.165997 0.0000945 19.1527245 0.0028131
CO 0.0803421 0.0659161 5.569252 0.0002590 -12.5790675 0.0094097
MSL 0.0734487 0.0589145 5.053527 0.0006186 -16.2613977 0.0277614
IH 0.0719150 0.0573568 4.939831 0.0007494 -9.3449051 0.0354801
CTFS 0.0705440 0.0559642 4.838504 0.0008891 14.3231800 0.0442704
UT 0.0704128 0.0558310 4.828827 0.0009037 -14.8606501 0.0452229
MS 0.0686230 0.0540132 4.697043 0.0011284 -14.8462730 0.0606040
CK 0.0670600 0.0524256 4.582365 0.0013689 15.4450943 0.0785803
PROT 0.0658116 0.0511577 4.491053 0.0015963 2.1108214 0.0970292
DD 0.0638377 0.0491528 4.347166 0.0020332 -8.6326827 0.1365405
FAT 0.0628176 0.0481167 4.273045 0.0023028 1.2796392 0.1637385
SU 0.0623970 0.0476895 4.242529 0.0024239 7.0165690 0.1767005
CONF 0.0621537 0.0474424 4.224892 0.0024968 -11.6231634 0.1847326
MET 0.0618359 0.0471196 4.201861 0.0025952 -7.2376432 0.1958719
MR 0.0617517 0.0470341 4.195767 0.0026218 -7.5588944 0.1989504
FOOT 0.0614269 0.0467042 4.172251 0.0027273 31.1509094 0.2113774
MOB 0.0613973 0.0466741 4.170111 0.0027372 8.6217376 0.2125534
LP 0.0613481 0.0466241 4.166549 0.0027536 8.0864569 0.2145285
FTP 0.0608284 0.0460963 4.128971 0.0029327 13.5010333 0.2367661
DO 0.0607904 0.0460577 4.126220 0.0029463 8.8960747 0.2385010
ME 0.0607825 0.0460496 4.125648 0.0029491 -6.2944604 0.2388636
UD 0.0606216 0.0458862 4.114024 0.0030071 -12.1091142 0.2463830
FA 0.0605549 0.0458185 4.109209 0.0030315 -8.6648464 0.2495819
MT 0.0601784 0.0454361 4.082026 0.0031728 -8.2421107 0.2686411
CA 0.0599002 0.0451535 4.061948 0.0032814 8.3011337 0.2838987
DF 0.0598369 0.0450892 4.057381 0.0033066 7.4854663 0.2875213
DHL 0.0595211 0.0447684 4.034611 0.0034352 -8.1319072 0.3064961
SH 0.0586677 0.0439017 3.973163 0.0038074 4.4345159 0.3666760
MILK 0.0585384 0.0437704 3.963861 0.0038671 0.0328321 0.3771595
MDR 0.0584166 0.0436466 3.955101 0.0039243 6.0467308 0.3874240
ST 0.0578881 0.0431099 3.917120 0.0041817 -7.9781463 0.4369789
SCS 0.0576964 0.0429152 3.903356 0.0042790 -3.9614430 0.4573264
AFS 0.0575918 0.0428089 3.895845 0.0043331 -5.2064660 0.4690656
MSP 0.0575087 0.0427245 3.889880 0.0043765 -3.9533312 0.4787399
DS 0.0573283 0.0425413 3.876936 0.0044723 -4.7841746 0.5009026
WL 0.0572605 0.0424724 3.872073 0.0045088 3.4479700 0.5096796
TL 0.0572547 0.0424665 3.871655 0.0045119 -6.7563254 0.5104479
FE 0.0572054 0.0424165 3.868123 0.0045386 -3.8209975 0.5170084
DA 0.0566761 0.0418789 3.830184 0.0048356 4.3714958 0.5986609
HHE 0.0565353 0.0417358 3.820095 0.0049178 2.6941677 0.6249126
HL 0.0563327 0.0415301 3.805592 0.0050384 -3.1669335 0.6676242
LOC 0.0562600 0.0414563 3.800388 0.0050823 -3.0195770 0.6847865
FL 0.0562527 0.0414489 3.799865 0.0050868 2.8115963 0.6865743
SCK 0.0560192 0.0412117 3.783153 0.0052307 1.9379506 0.7520160
CW 0.0559471 0.0411384 3.777995 0.0052759 -1.9950607 0.7767448
FEED 0.0559443 0.0411356 3.777797 0.0052777 -1.3559978 0.7777588
BQ 0.0559195 0.0411104 3.776021 0.0052933 -1.9570851 0.7870649
TU 0.0558834 0.0410737 3.773438 0.0053162 1.0731908 0.8014578
UF 0.0558449 0.0410347 3.770689 0.0053406 2.2354069 0.8181403
DCA 0.0557115 0.0408991 3.761148 0.0054263 -1.0069789 0.8965483
BD 0.0557011 0.0408885 3.760401 0.0054331 0.8702554 0.9055126
BCS 0.0556786 0.0408657 3.758795 0.0054477 0.6193741 0.9285710
RUM 0.0556595 0.0408464 3.757433 0.0054601 -0.3154634 0.9570449
HH 0.0556561 0.0408428 3.757185 0.0054623 -0.2539972 0.9646162
Fitting Birth_Year to the model made one more trait significativally correlated with cortisol and enhanced the proportio of variation explained by the model:
  • BMR = Body Maintenance Requirements
  • CO = Cystic ovaries
  • MSL = Median Suspensory Ligament
  • CTFS = Calving to First Service
  • UT = Udder Texture
  • IH = Interdigital Hyperplasia

Below we can check the improvement in the percentage of variation in Cortisol explained by the Trait_GEBVs + Birth_Year.

CORR_BY <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv")

CORR_MIN <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv")

CORR_BY <- CORR_BY[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]

CORR_MIN <- CORR_MIN[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]

colnames(CORR_BY)[colnames(CORR_BY) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_BY"

colnames(CORR_MIN)[colnames(CORR_MIN) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_MIN"

Corr_R2_comp <- merge(CORR_MIN, CORR_BY, by.x = "Trait", by.y = "Trait")

Corr_R2_comp$Absolute_Change <- Corr_R2_comp$Adjusted_R_Squared_BY - Corr_R2_comp$Adjusted_R_Squared_MIN

write.csv(Corr_R2_comp, "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/absolute_improv_r2.csv")
X Trait Adjusted_R_Squared_Model_MIN Adjusted_R_Squared_MIN.BY Absolute_Change
1 AFS -0.0013450 0.0428089 0.0441539
2 BCS -0.0035151 0.0408657 0.0443809
3 BD -0.0038466 0.0408885 0.0447351
4 BMR 0.0363266 0.0738886 0.0375621
5 BQ -0.0037056 0.0411104 0.0448160
6 CA -0.0002239 0.0451535 0.0453774
7 CK 0.0067808 0.0524256 0.0456448
8 CO 0.0243278 0.0659161 0.0415882
9 CONF 0.0057062 0.0474424 0.0417362
10 CTFS 0.0147186 0.0559642 0.0412457
11 CW -0.0032878 0.0411384 0.0444262
12 DA -0.0031907 0.0418789 0.0450696
13 DCA -0.0038713 0.0408991 0.0447705
14 DD 0.0061790 0.0491528 0.0429738
15 DF 0.0007822 0.0450892 0.0443070
16 DHL 0.0014340 0.0447684 0.0433344
17 DO 0.0022267 0.0460577 0.0438310
18 DS 0.0004528 0.0425413 0.0420884
19 FA 0.0004168 0.0458185 0.0454018
20 FAT 0.0007623 0.0481167 0.0473544
21 FE -0.0009774 0.0424165 0.0433939
22 FEED -0.0038759 0.0411356 0.0450115
23 FL -0.0036960 0.0414489 0.0451449
24 FOOT 0.0004412 0.0467042 0.0462629
25 FTP -0.0010778 0.0460963 0.0471741
26 HH -0.0038249 0.0408428 0.0446677
27 HHE -0.0038236 0.0417358 0.0455595
28 HL -0.0025503 0.0415301 0.0440805
29 IH 0.0062855 0.0573568 0.0510713
30 LOC -0.0030185 0.0414563 0.0444747
31 LP 0.0035770 0.0466241 0.0430471
32 MDR -0.0000516 0.0436466 0.0436982
33 ME -0.0004452 0.0460496 0.0464948
34 MET 0.0032156 0.0471196 0.0439039
35 MILK -0.0038237 0.0437704 0.0475941
36 MOB 0.0008792 0.0466741 0.0457949
37 MR 0.0028254 0.0470341 0.0442086
38 MS 0.0090298 0.0540132 0.0449834
39 MSL 0.0160455 0.0589145 0.0428690
40 MSP -0.0023812 0.0427245 0.0451057
41 MT 0.0037810 0.0454361 0.0416552
42 PROT 0.0001421 0.0511577 0.0510155
43 RUM -0.0035598 0.0408464 0.0444062
44 SCK -0.0027305 0.0412117 0.0439421
45 SCS -0.0007321 0.0429152 0.0436472
46 SH 0.0004643 0.0439017 0.0434374
47 ST -0.0020640 0.0431099 0.0451738
48 SU 0.0039131 0.0476895 0.0437764
49 TL -0.0029717 0.0424665 0.0454382
50 TU -0.0036547 0.0410737 0.0447284
51 UD 0.0046866 0.0458862 0.0411996
52 UF -0.0020001 0.0410347 0.0430348
53 UT 0.0128310 0.0558310 0.0430000
54 WL -0.0000350 0.0424724 0.0425074

Adjusted_R_Squared_Model_MIN = Model without Birth Year
Adjusted_R_Squared_MIN.BY = Model adding Birth Year

10.2.4 Adding BIRTH_YEAR and SAMPLING DATE

The regression model added the BY and SAMPLING DATE is shown bellow:

\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + SAMPLING\_DATE + \epsilon \]

Where:

  • \(y\) represents Cortisol phenotypes.
  • \(GEBV_{\text{Trait}}\) denotes the GEBV for the specific trait (e.g., Milk Yield).
  • \(BIRTH\_YEAR\) is the birth year of the subjects, included as a factor.
  • \(SAMPLING\_DATE\) is the cortisol sampling date for the subjects, included as a factor.
  • \(\beta_0\) and \(\beta_1\) are the intercept and regression coefficient, respectively.
  • \(\epsilon\) represents the error term capturing unexplained variability.

The SAMPLING_DATE variable is also converted to a factor to account for the categorical nature of sampling date.

# Convert BIRTH_YEAR to a factor and rename
data_final$BIRTH_YEAR <- as.factor(data_final$BIRTH_YEAR)

# Convert Sampling_data to a factor and rename
data_final$Sampling_date <- as.factor(data_final$Sampling_date)

# Initialize a list to store the results
results_list <- list()

# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 5:ncol(data_final)) {
  trait_name <- colnames(data_final)[i]
  
  # Fit the linear regression model with BIRTH_YEAR as an additional predictor
  model <- lm(data_final[[2]] ~ data_final[[i]] + data_final$BIRTH_YEAR + data_final$Sampling_date , data = data_final)
  
  # Summarize the model
  model_summary <- summary(model)
  
  # Extract the desired statistics
  multiple_r_squared <- model_summary$r.squared
  adjusted_r_squared <- model_summary$adj.r.squared
  f_statistic <- model_summary$fstatistic[1] # F-statistic value
  f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
  f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
  p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
  
  # Extract the coefficient and its p-value for the trait
  coef_summary <- coef(model_summary)
  trait_coef <- coef_summary[2, "Estimate"]  # Assumes the trait is the second predictor
  trait_p_value <- coef_summary[2, "Pr(>|t|)"]
  
  # Combine the statistics into a data frame
  result <- data.frame(
    Trait = trait_name,
    Multiple_R_Squared = multiple_r_squared,
    Adjusted_R_Squared = adjusted_r_squared,
    F_Statistic = f_statistic,
    P_Value = p_value,
    Coefficient = trait_coef,
    Coefficient_P_Value = trait_p_value
  )
  
  # Append the result to the results list
  results_list[[i - 2]] <- result
}

# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)

# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)

Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR and SAMPLING_DATE

Trait Multiple_R_Squared Adjusted_R_Squared F_Statistic P_Value Coefficient Coefficient_P_Value
CO 0.3913617 0.3257757 5.967151 0 -11.1849879 0.0099861
BMR 0.3899307 0.3241904 5.931386 0 14.4612444 0.0135665
LP 0.3858330 0.3196512 5.829896 0 12.2619950 0.0330355
MILK 0.3850052 0.3187343 5.809559 0 0.0671160 0.0396652
PROT 0.3841834 0.3178238 5.789422 0 2.2166395 0.0476301
UT 0.3822859 0.3157218 5.743130 0 -12.1238483 0.0731558
CK 0.3801906 0.3134008 5.692345 0 12.7333709 0.1192726
HHE 0.3795579 0.3126999 5.677076 0 7.4003505 0.1388525
MSP 0.3793006 0.3124149 5.670876 0 -7.2388952 0.1478152
DA 0.3791391 0.3122360 5.666989 0 10.8037510 0.1537699
TU 0.3785722 0.3116080 5.653351 0 -5.2064639 0.1769405
MSL 0.3777098 0.3106526 5.632656 0 -8.6230634 0.2203509
BQ 0.3775224 0.3104450 5.628166 0 -7.7284891 0.2313766
IH 0.3772456 0.3101385 5.621542 0 -4.7354916 0.2488963
ST 0.3767980 0.3096426 5.610839 0 -9.9721644 0.2808121
LOC 0.3766890 0.3095218 5.608233 0 -7.2367609 0.2893509
FAT 0.3765584 0.3093772 5.605116 0 0.8520581 0.3000062
UD 0.3763515 0.3091480 5.600177 0 -9.3297651 0.3179533
CA 0.3757417 0.3084725 5.585641 0 6.0756898 0.3798799
MS 0.3755038 0.3082090 5.579979 0 -6.0375749 0.4085912
SCK 0.3754363 0.3081342 5.578372 0 -4.4161473 0.4173165
SCS 0.3754033 0.3080976 5.577587 0 3.9080681 0.4216769
IDD 0.3752677 0.3079474 5.574362 0 3.7029601 0.4403519
FOOT 0.3751830 0.3078536 5.572349 0 17.3309360 0.4526548
TL 0.3750803 0.3077398 5.569908 0 -6.6585204 0.4683140
MET 0.3749055 0.3075462 5.565755 0 -3.4014191 0.4970656
CTFS 0.3747600 0.3073850 5.562300 0 4.0557716 0.5233387
CONF 0.3746137 0.3072229 5.558828 0 -4.7896643 0.5523352
FE 0.3745078 0.3071056 5.556316 0 -2.9375586 0.5752627
HL 0.3743988 0.3069849 5.553731 0 3.4274416 0.6009102
FA 0.3742870 0.3068610 5.551080 0 -3.2821641 0.6298652
DD 0.3742836 0.3068573 5.551001 0 2.5402141 0.6307730
MOB 0.3742556 0.3068263 5.550337 0 3.0592146 0.6385442
FTP 0.3742359 0.3068045 5.549870 0 4.8583743 0.6441419
BCS 0.3741681 0.3067293 5.548263 0 2.6692352 0.6643615
HH 0.3740753 0.3066266 5.546066 0 2.0230249 0.6947785
DF 0.3740392 0.3065865 5.545209 0 2.3681806 0.7076996
FL 0.3739824 0.3065237 5.543865 0 -2.1711035 0.7294613
UF 0.3739605 0.3064994 5.543347 0 2.8880683 0.7384413
MDR 0.3738855 0.3064163 5.541571 0 1.8495373 0.7722558
WL 0.3738548 0.3063822 5.540843 0 1.2643028 0.7878801
AFS 0.3738293 0.3063540 5.540239 0 1.5814380 0.8018670
RUM 0.3738023 0.3063241 5.539601 0 -1.2574561 0.8179113
SH 0.3738015 0.3063233 5.539583 0 1.0342843 0.8184040
BD 0.3736958 0.3062061 5.537080 0 0.7613199 0.9071017
MR 0.3736957 0.3062060 5.537078 0 0.6291752 0.9071898
SU 0.3736871 0.3061965 5.536876 0 0.4745538 0.9186634
FEED 0.3736818 0.3061907 5.536751 0 0.3944746 0.9266753
DHL 0.3736774 0.3061858 5.536647 0 -0.5919776 0.9340753
DO 0.3736757 0.3061838 5.536604 0 0.5220723 0.9373261
DS 0.3736695 0.3061769 5.536458 0 0.3979132 0.9502668
CW 0.3736676 0.3061749 5.536414 0 0.3562368 0.9547808
ME 0.3736658 0.3061729 5.536370 0 -0.2401976 0.9599012
MT 0.3736595 0.3061659 5.536223 0 -0.0976386 0.9882642
DCA 0.3736593 0.3061657 5.536217 0 0.0802555 0.9906432
Fitting Birth_Year and Sampling_date to the model these are the traits with significant correlation:
  • CO = Cystic ovaries
  • BMR = Body Maintenance Requirements
  • LP = Lactation persistency
  • MILK = Milk yield
  • PROT = Protein yield
  • UT = Udder Texture

11 Heritability estimation - BLUPF90

11.1 Files preparation

Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.

First you need to create a directory in your home directory, prepare and save the following files in:

  • Phenotype and Fixed effects file
  • Pedigree file
  • Genotype file
  • BlupF90+ executable file
  • RenumF90 executable file
  • Parameter file

      11.1.1 Phenotype and Fixed effects file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Phenotype THIRD COLUMN = Fixed Effect 1 FOURTH COLUMN = Fixed Effect 2

      To get in one file these four columns we need the following code:

      #File with equivalence among different ids
      eq_ids <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
      
      # Genotype file with cid
      geno <- read.table("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
      
      # Phenotipic file and fixed effects
      data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")
      
      # creating a pheno file with only ID, Cortisol, BY and Sam date columns
      pheno <- data_final %>%
        select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column iid and and bring the iid from eq_ids to geno file
      pheno$iid <- eq_ids$iid[match(pheno$ID, eq_ids$elora_id)]
      
      # organizing columns sequence and keep only iid
      pheno <- pheno%>%
        select(iid, T4Cortisol, BIRTH_YEAR, Sampling_date)
      
      # Create a new column geno$iid, and bring the iid from eq_ids to geno file
      geno$iid <- eq_ids$iid[match(geno$V2, eq_ids$cdn_id)]
      
      # organizing the columns sequence
      library(dplyr)
      geno <- geno %>%
        select(V1, V2, iid, everything())
      
      # Keeping in the pheno file only the rows present also in geno file
      pheno <- pheno %>%
        filter(iid %in% geno$iid)
      
      write.table(pheno, "/home/bambrozi/2_CORTISOL/Heritability_BLUPF90/pheno_fix_eff.txt", sep = " ", col.names = FALSE, row.names = FALSE, quote = FALSE)

      The file should be saved as text file, with separation by space and no columns names.

      11.1.2 Pedigree file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Sire ID THIRD COLUMN = Dam ID

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to remove the commas of a .csv file to a file with sepation by spaces.

      # to replace comma for space in the .csv file with the equivalence among IDs
      sed -i 's/,/ /g' bruno_ids.csv

      11.1.3 Genotype file

      The appearance of this file is like this:

      My Image

      FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)

      The file should be saved as text file, with separation by space and no columns names.

      We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.

      # Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
      awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid

      11.1.4 Download the executable files

      Download from this website https://nce.ads.uga.edu/html/projects/programs/Linux/64bit/:
      • BlupF90+
      • renumF90

      11.1.5 Parameter file

      The appearance of this file is like this:

      My Image

      • DATAFILE: bellow this line you need to inform the name of the file with phenotype and fixed effects. As before running BLUPF90 on server you are going to direct the terminal to the directory where all these files are placed you only need to inform the name.
      • TRAITS: below this line you need to inform which column are the phenotype date in the previous file, in this example, 2.
      • FIELDS_PASSED TO OUTPUT:
      • WEIGHT (S):
      • RESIDUAL VARIANCE: for the firs run you need to inform the value of 1.0, for the second you can pick the variance from the firs run’s output.
      • EFFECT: you will inform your first effect, in this example, Birth Year, which is in the column 3, and the word cross numer because is a number.
      • EFFECT: you should provide the next effect, in this example, sample date, as sample date has one non numeric character you should inform as cross alpha, in this example column 4.
      • EFFECT: now I’m providing my animal ID information, in this example column 1, and again cross alpha because has number and letters in the ID. I’m also informing that this effect is RANDOM, and that is my animal effect.
      • FILE: bellow this line I need to provide the pedigree file. Again, as I’m already in the directory which contain the pedigree file I only need to provide the file name.
      • FILE-POS: Here I’ll inform which columns should be considered in the pedigree file, in this situation, 1 2 3 0 0.
      • PED_DEPTH: Now we can inform the depth we want the software considers the pedigree, or if we leave 0 it will the maximum possible.
      • (CO) VARIANCES: Here you should provide the Variance/Co-variance matrix, like as for residual variance in the first run we set up to 0 in this example that we don´t have to imagine any co-variance, but if you know that exist variance among you effects you shoul set up XXX for ….
      • OPTION method: VCE (Variance Component Estimation).
      • OPTION OrigID: this will keep the original ID informed.
      • OPTION missing 9999: you are informing that missing values will appear as 9999
      • OPTION se_covar_function: H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
        • H2_1: the name that your function will appear on the output files.
        • g_3_3: you are asking for genetic variance estimation for the 3rd informed effect.
        • **_1_1**: this effect is in the 1st column.
        • /(g_3_3_1_1+r_1_1): to get the total phenotipic variance, you are summing to genetic variance the residual variance of the effect in column 1.

11.2 Running renumF90 and BlupF90+

  1. Go to the server you wanna run this analysis, for instance, grand

  2. Now go to the directory you have created to run this analysis where that set of files are placed.

ssh grand
  1. Make the renumF90 and BlupF90+ files executables
chmod +x renumf90
chmod +x blupf90+
  1. Run renumF90
./renumf90

When you run the code above, it will as you the name of your parameter card.

renumF90 will generate a new parameter card called renf90.par

  1. Run blupf90+
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the new one renf90.par

blupF90+ will generate the blupf90.log file with the results.

My Image

Now you should update you renf90.par file with these informations from the .log file

Copy Residual Variance from blupf90.log and will paste on renf90.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90.par (CO) VARIANCE

  1. 2nd blupf90+ run
./blupf90+

blupf90+ will ask you for parameter’s card name, now you should provide with the UPDATED renf90.par

If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.

11.3 Running renumF90 and BlupF90+ adding GENOTYPES

The previous analysis considered only the pedigree, but now we can insert the genotype information. To perform this you need a new diretory called Blup_Genomic inside your previously created directory.

Now you need add the reference for your genotype file in your previous parameter file renum.par and save in this new sub-directory.

The highlighted text show the added part.

My Image

Go to the sub-diretory

Note that as you are in the subdirectory, but your phenotype and fixed effect, pedigree and genotype files are still in the previous directory you need to add the highlighted part to inform the correct location

My Image

To run the renumf90 and blupf90+ you also need to add ../ to correct specify the location. Run renumF90

./../renumf90

Run blupf90+

./../blupf90+

The steps for run, update parameter card, re-run are the same.

11.4 Results

We have 2 different output files

  1. Variance components: blupf90.log

My Image

In this file we can find the heritabilit (SD) and for instance the convergence (similarity)

  1. Solutions: solutions.orig
    In this file we will find the solutions (results) for each effect

In our example:

  • EFFECT 1: Birth Year, has 4 levels (2018, 2019, 2020 and 2021), and the solution that for this fixed effect is how much each level add.
  • EFFECT 2: Sampling date, has 23 levels, and the solutions
  • EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.

    My Image